1""" 2These the test the public routines exposed in types/common.py 3related to inference and not otherwise tested in types/test_common.py 4 5""" 6import collections 7from collections import namedtuple 8from datetime import date, datetime, time, timedelta 9from decimal import Decimal 10from fractions import Fraction 11from io import StringIO 12from numbers import Number 13import re 14 15import numpy as np 16import pytest 17import pytz 18 19from pandas._libs import lib, missing as libmissing 20import pandas.util._test_decorators as td 21 22from pandas.core.dtypes import inference 23from pandas.core.dtypes.common import ( 24 ensure_int32, 25 is_bool, 26 is_datetime64_any_dtype, 27 is_datetime64_dtype, 28 is_datetime64_ns_dtype, 29 is_datetime64tz_dtype, 30 is_float, 31 is_integer, 32 is_number, 33 is_scalar, 34 is_scipy_sparse, 35 is_timedelta64_dtype, 36 is_timedelta64_ns_dtype, 37) 38 39import pandas as pd 40from pandas import ( 41 Categorical, 42 DataFrame, 43 DateOffset, 44 DatetimeIndex, 45 Index, 46 Interval, 47 Period, 48 PeriodIndex, 49 Series, 50 Timedelta, 51 TimedeltaIndex, 52 Timestamp, 53) 54import pandas._testing as tm 55from pandas.core.arrays import IntegerArray 56 57 58@pytest.fixture(params=[True, False], ids=str) 59def coerce(request): 60 return request.param 61 62 63# collect all objects to be tested for list-like-ness; use tuples of objects, 64# whether they are list-like or not (special casing for sets), and their ID 65ll_params = [ 66 ([1], True, "list"), 67 ([], True, "list-empty"), 68 ((1,), True, "tuple"), 69 ((), True, "tuple-empty"), 70 ({"a": 1}, True, "dict"), 71 ({}, True, "dict-empty"), 72 ({"a", 1}, "set", "set"), 73 (set(), "set", "set-empty"), 74 (frozenset({"a", 1}), "set", "frozenset"), 75 (frozenset(), "set", "frozenset-empty"), 76 (iter([1, 2]), True, "iterator"), 77 (iter([]), True, "iterator-empty"), 78 ((x for x in [1, 2]), True, "generator"), 79 ((_ for _ in []), True, "generator-empty"), 80 (Series([1]), True, "Series"), 81 (Series([], dtype=object), True, "Series-empty"), 82 (Series(["a"]).str, True, "StringMethods"), 83 (Series([], dtype="O").str, True, "StringMethods-empty"), 84 (Index([1]), True, "Index"), 85 (Index([]), True, "Index-empty"), 86 (DataFrame([[1]]), True, "DataFrame"), 87 (DataFrame(), True, "DataFrame-empty"), 88 (np.ndarray((2,) * 1), True, "ndarray-1d"), 89 (np.array([]), True, "ndarray-1d-empty"), 90 (np.ndarray((2,) * 2), True, "ndarray-2d"), 91 (np.array([[]]), True, "ndarray-2d-empty"), 92 (np.ndarray((2,) * 3), True, "ndarray-3d"), 93 (np.array([[[]]]), True, "ndarray-3d-empty"), 94 (np.ndarray((2,) * 4), True, "ndarray-4d"), 95 (np.array([[[[]]]]), True, "ndarray-4d-empty"), 96 (np.array(2), False, "ndarray-0d"), 97 (1, False, "int"), 98 (b"123", False, "bytes"), 99 (b"", False, "bytes-empty"), 100 ("123", False, "string"), 101 ("", False, "string-empty"), 102 (str, False, "string-type"), 103 (object(), False, "object"), 104 (np.nan, False, "NaN"), 105 (None, False, "None"), 106] 107objs, expected, ids = zip(*ll_params) 108 109 110@pytest.fixture(params=zip(objs, expected), ids=ids) 111def maybe_list_like(request): 112 return request.param 113 114 115def test_is_list_like(maybe_list_like): 116 obj, expected = maybe_list_like 117 expected = True if expected == "set" else expected 118 assert inference.is_list_like(obj) == expected 119 120 121def test_is_list_like_disallow_sets(maybe_list_like): 122 obj, expected = maybe_list_like 123 expected = False if expected == "set" else expected 124 assert inference.is_list_like(obj, allow_sets=False) == expected 125 126 127def test_is_list_like_recursion(): 128 # GH 33721 129 # interpreter would crash with SIGABRT 130 def foo(): 131 inference.is_list_like([]) 132 foo() 133 134 with pytest.raises(RecursionError): 135 foo() 136 137 138def test_is_sequence(): 139 is_seq = inference.is_sequence 140 assert is_seq((1, 2)) 141 assert is_seq([1, 2]) 142 assert not is_seq("abcd") 143 assert not is_seq(np.int64) 144 145 class A: 146 def __getitem__(self): 147 return 1 148 149 assert not is_seq(A()) 150 151 152def test_is_array_like(): 153 assert inference.is_array_like(Series([], dtype=object)) 154 assert inference.is_array_like(Series([1, 2])) 155 assert inference.is_array_like(np.array(["a", "b"])) 156 assert inference.is_array_like(Index(["2016-01-01"])) 157 158 class DtypeList(list): 159 dtype = "special" 160 161 assert inference.is_array_like(DtypeList()) 162 163 assert not inference.is_array_like([1, 2, 3]) 164 assert not inference.is_array_like(()) 165 assert not inference.is_array_like("foo") 166 assert not inference.is_array_like(123) 167 168 169@pytest.mark.parametrize( 170 "inner", 171 [ 172 [], 173 [1], 174 (1,), 175 (1, 2), 176 {"a": 1}, 177 {1, "a"}, 178 Series([1]), 179 Series([], dtype=object), 180 Series(["a"]).str, 181 (x for x in range(5)), 182 ], 183) 184@pytest.mark.parametrize("outer", [list, Series, np.array, tuple]) 185def test_is_nested_list_like_passes(inner, outer): 186 result = outer([inner for _ in range(5)]) 187 assert inference.is_list_like(result) 188 189 190@pytest.mark.parametrize( 191 "obj", 192 [ 193 "abc", 194 [], 195 [1], 196 (1,), 197 ["a"], 198 "a", 199 {"a"}, 200 [1, 2, 3], 201 Series([1]), 202 DataFrame({"A": [1]}), 203 ([1, 2] for _ in range(5)), 204 ], 205) 206def test_is_nested_list_like_fails(obj): 207 assert not inference.is_nested_list_like(obj) 208 209 210@pytest.mark.parametrize("ll", [{}, {"A": 1}, Series([1]), collections.defaultdict()]) 211def test_is_dict_like_passes(ll): 212 assert inference.is_dict_like(ll) 213 214 215@pytest.mark.parametrize( 216 "ll", 217 [ 218 "1", 219 1, 220 [1, 2], 221 (1, 2), 222 range(2), 223 Index([1]), 224 dict, 225 collections.defaultdict, 226 Series, 227 ], 228) 229def test_is_dict_like_fails(ll): 230 assert not inference.is_dict_like(ll) 231 232 233@pytest.mark.parametrize("has_keys", [True, False]) 234@pytest.mark.parametrize("has_getitem", [True, False]) 235@pytest.mark.parametrize("has_contains", [True, False]) 236def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains): 237 class DictLike: 238 def __init__(self, d): 239 self.d = d 240 241 if has_keys: 242 243 def keys(self): 244 return self.d.keys() 245 246 if has_getitem: 247 248 def __getitem__(self, key): 249 return self.d.__getitem__(key) 250 251 if has_contains: 252 253 def __contains__(self, key) -> bool: 254 return self.d.__contains__(key) 255 256 d = DictLike({1: 2}) 257 result = inference.is_dict_like(d) 258 expected = has_keys and has_getitem and has_contains 259 260 assert result is expected 261 262 263def test_is_file_like(): 264 class MockFile: 265 pass 266 267 is_file = inference.is_file_like 268 269 data = StringIO("data") 270 assert is_file(data) 271 272 # No read / write attributes 273 # No iterator attributes 274 m = MockFile() 275 assert not is_file(m) 276 277 MockFile.write = lambda self: 0 278 279 # Write attribute but not an iterator 280 m = MockFile() 281 assert not is_file(m) 282 283 # gh-16530: Valid iterator just means we have the 284 # __iter__ attribute for our purposes. 285 MockFile.__iter__ = lambda self: self 286 287 # Valid write-only file 288 m = MockFile() 289 assert is_file(m) 290 291 del MockFile.write 292 MockFile.read = lambda self: 0 293 294 # Valid read-only file 295 m = MockFile() 296 assert is_file(m) 297 298 # Iterator but no read / write attributes 299 data = [1, 2, 3] 300 assert not is_file(data) 301 302 303test_tuple = collections.namedtuple("Test", ["a", "b", "c"]) 304 305 306@pytest.mark.parametrize("ll", [test_tuple(1, 2, 3)]) 307def test_is_names_tuple_passes(ll): 308 assert inference.is_named_tuple(ll) 309 310 311@pytest.mark.parametrize("ll", [(1, 2, 3), "a", Series({"pi": 3.14})]) 312def test_is_names_tuple_fails(ll): 313 assert not inference.is_named_tuple(ll) 314 315 316def test_is_hashable(): 317 318 # all new-style classes are hashable by default 319 class HashableClass: 320 pass 321 322 class UnhashableClass1: 323 __hash__ = None 324 325 class UnhashableClass2: 326 def __hash__(self): 327 raise TypeError("Not hashable") 328 329 hashable = (1, 3.14, np.float64(3.14), "a", (), (1,), HashableClass()) 330 not_hashable = ([], UnhashableClass1()) 331 abc_hashable_not_really_hashable = (([],), UnhashableClass2()) 332 333 for i in hashable: 334 assert inference.is_hashable(i) 335 for i in not_hashable: 336 assert not inference.is_hashable(i) 337 for i in abc_hashable_not_really_hashable: 338 assert not inference.is_hashable(i) 339 340 # numpy.array is no longer collections.abc.Hashable as of 341 # https://github.com/numpy/numpy/pull/5326, just test 342 # is_hashable() 343 assert not inference.is_hashable(np.array([])) 344 345 346@pytest.mark.parametrize("ll", [re.compile("ad")]) 347def test_is_re_passes(ll): 348 assert inference.is_re(ll) 349 350 351@pytest.mark.parametrize("ll", ["x", 2, 3, object()]) 352def test_is_re_fails(ll): 353 assert not inference.is_re(ll) 354 355 356@pytest.mark.parametrize( 357 "ll", [r"a", "x", r"asdf", re.compile("adsf"), r"\u2233\s*", re.compile(r"")] 358) 359def test_is_recompilable_passes(ll): 360 assert inference.is_re_compilable(ll) 361 362 363@pytest.mark.parametrize("ll", [1, [], object()]) 364def test_is_recompilable_fails(ll): 365 assert not inference.is_re_compilable(ll) 366 367 368class TestInference: 369 @pytest.mark.parametrize( 370 "arr", 371 [ 372 np.array(list("abc"), dtype="S1"), 373 np.array(list("abc"), dtype="S1").astype(object), 374 [b"a", np.nan, b"c"], 375 ], 376 ) 377 def test_infer_dtype_bytes(self, arr): 378 result = lib.infer_dtype(arr, skipna=True) 379 assert result == "bytes" 380 381 @pytest.mark.parametrize( 382 "value, expected", 383 [ 384 (float("inf"), True), 385 (np.inf, True), 386 (-np.inf, False), 387 (1, False), 388 ("a", False), 389 ], 390 ) 391 def test_isposinf_scalar(self, value, expected): 392 # GH 11352 393 result = libmissing.isposinf_scalar(value) 394 assert result is expected 395 396 @pytest.mark.parametrize( 397 "value, expected", 398 [ 399 (float("-inf"), True), 400 (-np.inf, True), 401 (np.inf, False), 402 (1, False), 403 ("a", False), 404 ], 405 ) 406 def test_isneginf_scalar(self, value, expected): 407 result = libmissing.isneginf_scalar(value) 408 assert result is expected 409 410 @pytest.mark.parametrize("coerce_numeric", [True, False]) 411 @pytest.mark.parametrize( 412 "infinity", ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"] 413 ) 414 @pytest.mark.parametrize("prefix", ["", "-", "+"]) 415 def test_maybe_convert_numeric_infinities(self, coerce_numeric, infinity, prefix): 416 # see gh-13274 417 result = lib.maybe_convert_numeric( 418 np.array([prefix + infinity], dtype=object), 419 na_values={"", "NULL", "nan"}, 420 coerce_numeric=coerce_numeric, 421 ) 422 expected = np.array([np.inf if prefix in ["", "+"] else -np.inf]) 423 tm.assert_numpy_array_equal(result, expected) 424 425 def test_maybe_convert_numeric_infinities_raises(self): 426 msg = "Unable to parse string" 427 with pytest.raises(ValueError, match=msg): 428 lib.maybe_convert_numeric( 429 np.array(["foo_inf"], dtype=object), 430 na_values={"", "NULL", "nan"}, 431 coerce_numeric=False, 432 ) 433 434 def test_maybe_convert_numeric_post_floatify_nan(self, coerce): 435 # see gh-13314 436 data = np.array(["1.200", "-999.000", "4.500"], dtype=object) 437 expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) 438 nan_values = {-999, -999.0} 439 440 out = lib.maybe_convert_numeric(data, nan_values, coerce) 441 tm.assert_numpy_array_equal(out, expected) 442 443 def test_convert_infs(self): 444 arr = np.array(["inf", "inf", "inf"], dtype="O") 445 result = lib.maybe_convert_numeric(arr, set(), False) 446 assert result.dtype == np.float64 447 448 arr = np.array(["-inf", "-inf", "-inf"], dtype="O") 449 result = lib.maybe_convert_numeric(arr, set(), False) 450 assert result.dtype == np.float64 451 452 def test_scientific_no_exponent(self): 453 # See PR 12215 454 arr = np.array(["42E", "2E", "99e", "6e"], dtype="O") 455 result = lib.maybe_convert_numeric(arr, set(), False, True) 456 assert np.all(np.isnan(result)) 457 458 def test_convert_non_hashable(self): 459 # GH13324 460 # make sure that we are handing non-hashables 461 arr = np.array([[10.0, 2], 1.0, "apple"], dtype=object) 462 result = lib.maybe_convert_numeric(arr, set(), False, True) 463 tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) 464 465 def test_convert_numeric_uint64(self): 466 arr = np.array([2 ** 63], dtype=object) 467 exp = np.array([2 ** 63], dtype=np.uint64) 468 tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) 469 470 arr = np.array([str(2 ** 63)], dtype=object) 471 exp = np.array([2 ** 63], dtype=np.uint64) 472 tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) 473 474 arr = np.array([np.uint64(2 ** 63)], dtype=object) 475 exp = np.array([2 ** 63], dtype=np.uint64) 476 tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) 477 478 @pytest.mark.parametrize( 479 "arr", 480 [ 481 np.array([2 ** 63, np.nan], dtype=object), 482 np.array([str(2 ** 63), np.nan], dtype=object), 483 np.array([np.nan, 2 ** 63], dtype=object), 484 np.array([np.nan, str(2 ** 63)], dtype=object), 485 ], 486 ) 487 def test_convert_numeric_uint64_nan(self, coerce, arr): 488 expected = arr.astype(float) if coerce else arr.copy() 489 result = lib.maybe_convert_numeric(arr, set(), coerce_numeric=coerce) 490 tm.assert_almost_equal(result, expected) 491 492 def test_convert_numeric_uint64_nan_values(self, coerce): 493 arr = np.array([2 ** 63, 2 ** 63 + 1], dtype=object) 494 na_values = {2 ** 63} 495 496 expected = ( 497 np.array([np.nan, 2 ** 63 + 1], dtype=float) if coerce else arr.copy() 498 ) 499 result = lib.maybe_convert_numeric(arr, na_values, coerce_numeric=coerce) 500 tm.assert_almost_equal(result, expected) 501 502 @pytest.mark.parametrize( 503 "case", 504 [ 505 np.array([2 ** 63, -1], dtype=object), 506 np.array([str(2 ** 63), -1], dtype=object), 507 np.array([str(2 ** 63), str(-1)], dtype=object), 508 np.array([-1, 2 ** 63], dtype=object), 509 np.array([-1, str(2 ** 63)], dtype=object), 510 np.array([str(-1), str(2 ** 63)], dtype=object), 511 ], 512 ) 513 def test_convert_numeric_int64_uint64(self, case, coerce): 514 expected = case.astype(float) if coerce else case.copy() 515 result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) 516 tm.assert_almost_equal(result, expected) 517 518 def test_convert_numeric_string_uint64(self): 519 # GH32394 520 result = lib.maybe_convert_numeric( 521 np.array(["uint64"], dtype=object), set(), coerce_numeric=True 522 ) 523 assert np.isnan(result) 524 525 @pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64]) 526 def test_convert_int_overflow(self, value): 527 # see gh-18584 528 arr = np.array([value], dtype=object) 529 result = lib.maybe_convert_objects(arr) 530 tm.assert_numpy_array_equal(arr, result) 531 532 def test_maybe_convert_objects_uint64(self): 533 # see gh-4471 534 arr = np.array([2 ** 63], dtype=object) 535 exp = np.array([2 ** 63], dtype=np.uint64) 536 tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) 537 538 # NumPy bug: can't compare uint64 to int64, as that 539 # results in both casting to float64, so we should 540 # make sure that this function is robust against it 541 arr = np.array([np.uint64(2 ** 63)], dtype=object) 542 exp = np.array([2 ** 63], dtype=np.uint64) 543 tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) 544 545 arr = np.array([2, -1], dtype=object) 546 exp = np.array([2, -1], dtype=np.int64) 547 tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) 548 549 arr = np.array([2 ** 63, -1], dtype=object) 550 exp = np.array([2 ** 63, -1], dtype=object) 551 tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) 552 553 def test_maybe_convert_objects_datetime(self): 554 # GH27438 555 arr = np.array( 556 [np.datetime64("2000-01-01"), np.timedelta64(1, "s")], dtype=object 557 ) 558 exp = arr.copy() 559 out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) 560 tm.assert_numpy_array_equal(out, exp) 561 562 arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object) 563 exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]") 564 out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) 565 tm.assert_numpy_array_equal(out, exp) 566 567 arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object) 568 exp = arr.copy() 569 out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) 570 tm.assert_numpy_array_equal(out, exp) 571 572 @pytest.mark.parametrize( 573 "exp", 574 [ 575 IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])), 576 IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])), 577 ], 578 ) 579 def test_maybe_convert_objects_nullable_integer(self, exp): 580 # GH27335 581 arr = np.array([2, np.NaN], dtype=object) 582 result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1) 583 584 tm.assert_extension_array_equal(result, exp) 585 586 def test_maybe_convert_objects_bool_nan(self): 587 # GH32146 588 ind = Index([True, False, np.nan], dtype=object) 589 exp = np.array([True, False, np.nan], dtype=object) 590 out = lib.maybe_convert_objects(ind.values, safe=1) 591 tm.assert_numpy_array_equal(out, exp) 592 593 def test_mixed_dtypes_remain_object_array(self): 594 # GH14956 595 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) 596 result = lib.maybe_convert_objects(array, convert_datetime=1) 597 tm.assert_numpy_array_equal(result, array) 598 599 600class TestTypeInference: 601 602 # Dummy class used for testing with Python objects 603 class Dummy: 604 pass 605 606 def test_inferred_dtype_fixture(self, any_skipna_inferred_dtype): 607 # see pandas/conftest.py 608 inferred_dtype, values = any_skipna_inferred_dtype 609 610 # make sure the inferred dtype of the fixture is as requested 611 assert inferred_dtype == lib.infer_dtype(values, skipna=True) 612 613 @pytest.mark.parametrize("skipna", [True, False]) 614 def test_length_zero(self, skipna): 615 result = lib.infer_dtype(np.array([], dtype="i4"), skipna=skipna) 616 assert result == "integer" 617 618 result = lib.infer_dtype([], skipna=skipna) 619 assert result == "empty" 620 621 # GH 18004 622 arr = np.array([np.array([], dtype=object), np.array([], dtype=object)]) 623 result = lib.infer_dtype(arr, skipna=skipna) 624 assert result == "empty" 625 626 def test_integers(self): 627 arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype="O") 628 result = lib.infer_dtype(arr, skipna=True) 629 assert result == "integer" 630 631 arr = np.array([1, 2, 3, np.int64(4), np.int32(5), "foo"], dtype="O") 632 result = lib.infer_dtype(arr, skipna=True) 633 assert result == "mixed-integer" 634 635 arr = np.array([1, 2, 3, 4, 5], dtype="i4") 636 result = lib.infer_dtype(arr, skipna=True) 637 assert result == "integer" 638 639 @pytest.mark.parametrize( 640 "arr, skipna", 641 [ 642 (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False), 643 (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True), 644 (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False), 645 (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True), 646 ], 647 ) 648 def test_integer_na(self, arr, skipna): 649 # GH 27392 650 result = lib.infer_dtype(arr, skipna=skipna) 651 expected = "integer" if skipna else "integer-na" 652 assert result == expected 653 654 def test_infer_dtype_skipna_default(self): 655 # infer_dtype `skipna` default deprecated in GH#24050, 656 # changed to True in GH#29876 657 arr = np.array([1, 2, 3, np.nan], dtype=object) 658 659 result = lib.infer_dtype(arr) 660 assert result == "integer" 661 662 def test_bools(self): 663 arr = np.array([True, False, True, True, True], dtype="O") 664 result = lib.infer_dtype(arr, skipna=True) 665 assert result == "boolean" 666 667 arr = np.array([np.bool_(True), np.bool_(False)], dtype="O") 668 result = lib.infer_dtype(arr, skipna=True) 669 assert result == "boolean" 670 671 arr = np.array([True, False, True, "foo"], dtype="O") 672 result = lib.infer_dtype(arr, skipna=True) 673 assert result == "mixed" 674 675 arr = np.array([True, False, True], dtype=bool) 676 result = lib.infer_dtype(arr, skipna=True) 677 assert result == "boolean" 678 679 arr = np.array([True, np.nan, False], dtype="O") 680 result = lib.infer_dtype(arr, skipna=True) 681 assert result == "boolean" 682 683 result = lib.infer_dtype(arr, skipna=False) 684 assert result == "mixed" 685 686 def test_floats(self): 687 arr = np.array([1.0, 2.0, 3.0, np.float64(4), np.float32(5)], dtype="O") 688 result = lib.infer_dtype(arr, skipna=True) 689 assert result == "floating" 690 691 arr = np.array([1, 2, 3, np.float64(4), np.float32(5), "foo"], dtype="O") 692 result = lib.infer_dtype(arr, skipna=True) 693 assert result == "mixed-integer" 694 695 arr = np.array([1, 2, 3, 4, 5], dtype="f4") 696 result = lib.infer_dtype(arr, skipna=True) 697 assert result == "floating" 698 699 arr = np.array([1, 2, 3, 4, 5], dtype="f8") 700 result = lib.infer_dtype(arr, skipna=True) 701 assert result == "floating" 702 703 def test_decimals(self): 704 # GH15690 705 arr = np.array([Decimal(1), Decimal(2), Decimal(3)]) 706 result = lib.infer_dtype(arr, skipna=True) 707 assert result == "decimal" 708 709 arr = np.array([1.0, 2.0, Decimal(3)]) 710 result = lib.infer_dtype(arr, skipna=True) 711 assert result == "mixed" 712 713 result = lib.infer_dtype(arr[::-1], skipna=True) 714 assert result == "mixed" 715 716 arr = np.array([Decimal(1), Decimal("NaN"), Decimal(3)]) 717 result = lib.infer_dtype(arr, skipna=True) 718 assert result == "decimal" 719 720 arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype="O") 721 result = lib.infer_dtype(arr, skipna=True) 722 assert result == "decimal" 723 724 # complex is compatible with nan, so skipna has no effect 725 @pytest.mark.parametrize("skipna", [True, False]) 726 def test_complex(self, skipna): 727 # gets cast to complex on array construction 728 arr = np.array([1.0, 2.0, 1 + 1j]) 729 result = lib.infer_dtype(arr, skipna=skipna) 730 assert result == "complex" 731 732 arr = np.array([1.0, 2.0, 1 + 1j], dtype="O") 733 result = lib.infer_dtype(arr, skipna=skipna) 734 assert result == "mixed" 735 736 result = lib.infer_dtype(arr[::-1], skipna=skipna) 737 assert result == "mixed" 738 739 # gets cast to complex on array construction 740 arr = np.array([1, np.nan, 1 + 1j]) 741 result = lib.infer_dtype(arr, skipna=skipna) 742 assert result == "complex" 743 744 arr = np.array([1.0, np.nan, 1 + 1j], dtype="O") 745 result = lib.infer_dtype(arr, skipna=skipna) 746 assert result == "mixed" 747 748 # complex with nans stays complex 749 arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype="O") 750 result = lib.infer_dtype(arr, skipna=skipna) 751 assert result == "complex" 752 753 # test smaller complex dtype; will pass through _try_infer_map fastpath 754 arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype=np.complex64) 755 result = lib.infer_dtype(arr, skipna=skipna) 756 assert result == "complex" 757 758 def test_string(self): 759 pass 760 761 def test_unicode(self): 762 arr = ["a", np.nan, "c"] 763 result = lib.infer_dtype(arr, skipna=False) 764 # This currently returns "mixed", but it's not clear that's optimal. 765 # This could also return "string" or "mixed-string" 766 assert result == "mixed" 767 768 arr = ["a", np.nan, "c"] 769 result = lib.infer_dtype(arr, skipna=True) 770 assert result == "string" 771 772 arr = ["a", "c"] 773 result = lib.infer_dtype(arr, skipna=False) 774 assert result == "string" 775 776 @pytest.mark.parametrize( 777 "dtype, missing, skipna, expected", 778 [ 779 (float, np.nan, False, "floating"), 780 (float, np.nan, True, "floating"), 781 (object, np.nan, False, "floating"), 782 (object, np.nan, True, "empty"), 783 (object, None, False, "mixed"), 784 (object, None, True, "empty"), 785 ], 786 ) 787 @pytest.mark.parametrize("box", [pd.Series, np.array]) 788 def test_object_empty(self, box, missing, dtype, skipna, expected): 789 # GH 23421 790 arr = box([missing, missing], dtype=dtype) 791 792 result = lib.infer_dtype(arr, skipna=skipna) 793 assert result == expected 794 795 def test_datetime(self): 796 797 dates = [datetime(2012, 1, x) for x in range(1, 20)] 798 index = Index(dates) 799 assert index.inferred_type == "datetime64" 800 801 def test_infer_dtype_datetime64(self): 802 arr = np.array( 803 [np.datetime64("2011-01-01"), np.datetime64("2011-01-01")], dtype=object 804 ) 805 assert lib.infer_dtype(arr, skipna=True) == "datetime64" 806 807 @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) 808 def test_infer_dtype_datetime64_with_na(self, na_value): 809 # starts with nan 810 arr = np.array([na_value, np.datetime64("2011-01-02")]) 811 assert lib.infer_dtype(arr, skipna=True) == "datetime64" 812 813 arr = np.array([na_value, np.datetime64("2011-01-02"), na_value]) 814 assert lib.infer_dtype(arr, skipna=True) == "datetime64" 815 816 @pytest.mark.parametrize( 817 "arr", 818 [ 819 np.array( 820 [np.timedelta64("nat"), np.datetime64("2011-01-02")], dtype=object 821 ), 822 np.array( 823 [np.datetime64("2011-01-02"), np.timedelta64("nat")], dtype=object 824 ), 825 np.array([np.datetime64("2011-01-01"), Timestamp("2011-01-02")]), 826 np.array([Timestamp("2011-01-02"), np.datetime64("2011-01-01")]), 827 np.array([np.nan, Timestamp("2011-01-02"), 1.1]), 828 np.array([np.nan, "2011-01-01", Timestamp("2011-01-02")]), 829 np.array([np.datetime64("nat"), np.timedelta64(1, "D")], dtype=object), 830 np.array([np.timedelta64(1, "D"), np.datetime64("nat")], dtype=object), 831 ], 832 ) 833 def test_infer_datetimelike_dtype_mixed(self, arr): 834 assert lib.infer_dtype(arr, skipna=False) == "mixed" 835 836 def test_infer_dtype_mixed_integer(self): 837 arr = np.array([np.nan, Timestamp("2011-01-02"), 1]) 838 assert lib.infer_dtype(arr, skipna=True) == "mixed-integer" 839 840 @pytest.mark.parametrize( 841 "arr", 842 [ 843 np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]), 844 np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]), 845 np.array([datetime(2011, 1, 1), Timestamp("2011-01-02")]), 846 ], 847 ) 848 def test_infer_dtype_datetime(self, arr): 849 assert lib.infer_dtype(arr, skipna=True) == "datetime" 850 851 @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) 852 @pytest.mark.parametrize( 853 "time_stamp", [Timestamp("2011-01-01"), datetime(2011, 1, 1)] 854 ) 855 def test_infer_dtype_datetime_with_na(self, na_value, time_stamp): 856 # starts with nan 857 arr = np.array([na_value, time_stamp]) 858 assert lib.infer_dtype(arr, skipna=True) == "datetime" 859 860 arr = np.array([na_value, time_stamp, na_value]) 861 assert lib.infer_dtype(arr, skipna=True) == "datetime" 862 863 @pytest.mark.parametrize( 864 "arr", 865 [ 866 np.array([Timedelta("1 days"), Timedelta("2 days")]), 867 np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object), 868 np.array([timedelta(1), timedelta(2)]), 869 ], 870 ) 871 def test_infer_dtype_timedelta(self, arr): 872 assert lib.infer_dtype(arr, skipna=True) == "timedelta" 873 874 @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) 875 @pytest.mark.parametrize( 876 "delta", [Timedelta("1 days"), np.timedelta64(1, "D"), timedelta(1)] 877 ) 878 def test_infer_dtype_timedelta_with_na(self, na_value, delta): 879 # starts with nan 880 arr = np.array([na_value, delta]) 881 assert lib.infer_dtype(arr, skipna=True) == "timedelta" 882 883 arr = np.array([na_value, delta, na_value]) 884 assert lib.infer_dtype(arr, skipna=True) == "timedelta" 885 886 def test_infer_dtype_period(self): 887 # GH 13664 888 arr = np.array([Period("2011-01", freq="D"), Period("2011-02", freq="D")]) 889 assert lib.infer_dtype(arr, skipna=True) == "period" 890 891 arr = np.array([Period("2011-01", freq="D"), Period("2011-02", freq="M")]) 892 assert lib.infer_dtype(arr, skipna=True) == "period" 893 894 def test_infer_dtype_period_mixed(self): 895 arr = np.array( 896 [Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object 897 ) 898 assert lib.infer_dtype(arr, skipna=False) == "mixed" 899 900 arr = np.array( 901 [np.datetime64("nat"), Period("2011-01", freq="M")], dtype=object 902 ) 903 assert lib.infer_dtype(arr, skipna=False) == "mixed" 904 905 @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) 906 def test_infer_dtype_period_with_na(self, na_value): 907 # starts with nan 908 arr = np.array([na_value, Period("2011-01", freq="D")]) 909 assert lib.infer_dtype(arr, skipna=True) == "period" 910 911 arr = np.array([na_value, Period("2011-01", freq="D"), na_value]) 912 assert lib.infer_dtype(arr, skipna=True) == "period" 913 914 @pytest.mark.parametrize( 915 "data", 916 [ 917 [datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)], 918 [Timestamp("20170612"), Timestamp("20170311")], 919 [ 920 Timestamp("20170612", tz="US/Eastern"), 921 Timestamp("20170311", tz="US/Eastern"), 922 ], 923 [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], 924 [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")], 925 [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)], 926 ], 927 ) 928 def test_infer_datetimelike_array_datetime(self, data): 929 assert lib.infer_datetimelike_array(data) == "datetime" 930 931 @pytest.mark.parametrize( 932 "data", 933 [ 934 [timedelta(2017, 6, 12), timedelta(2017, 3, 11)], 935 [timedelta(2017, 6, 12), date(2017, 3, 11)], 936 [np.timedelta64(2017, "D"), np.timedelta64(6, "s")], 937 [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)], 938 ], 939 ) 940 def test_infer_datetimelike_array_timedelta(self, data): 941 assert lib.infer_datetimelike_array(data) == "timedelta" 942 943 def test_infer_datetimelike_array_date(self): 944 arr = [date(2017, 6, 12), date(2017, 3, 11)] 945 assert lib.infer_datetimelike_array(arr) == "date" 946 947 @pytest.mark.parametrize( 948 "data", 949 [ 950 ["2017-06-12", "2017-03-11"], 951 [20170612, 20170311], 952 [20170612.5, 20170311.8], 953 [Dummy(), Dummy()], 954 [Timestamp("20170612"), Timestamp("20170311", tz="US/Eastern")], 955 [Timestamp("20170612"), 20170311], 956 [timedelta(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], 957 ], 958 ) 959 def test_infer_datetimelike_array_mixed(self, data): 960 assert lib.infer_datetimelike_array(data) == "mixed" 961 962 @pytest.mark.parametrize( 963 "first, expected", 964 [ 965 [[None], "mixed"], 966 [[np.nan], "mixed"], 967 [[pd.NaT], "nat"], 968 [[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"], 969 [[np.datetime64("2017-06-12"), pd.NaT], "datetime"], 970 [[date(2017, 6, 12), pd.NaT], "date"], 971 [[timedelta(2017, 6, 12), pd.NaT], "timedelta"], 972 [[np.timedelta64(2017, "D"), pd.NaT], "timedelta"], 973 ], 974 ) 975 @pytest.mark.parametrize("second", [None, np.nan]) 976 def test_infer_datetimelike_array_nan_nat_like(self, first, second, expected): 977 first.append(second) 978 assert lib.infer_datetimelike_array(first) == expected 979 980 def test_infer_dtype_all_nan_nat_like(self): 981 arr = np.array([np.nan, np.nan]) 982 assert lib.infer_dtype(arr, skipna=True) == "floating" 983 984 # nan and None mix are result in mixed 985 arr = np.array([np.nan, np.nan, None]) 986 assert lib.infer_dtype(arr, skipna=True) == "empty" 987 assert lib.infer_dtype(arr, skipna=False) == "mixed" 988 989 arr = np.array([None, np.nan, np.nan]) 990 assert lib.infer_dtype(arr, skipna=True) == "empty" 991 assert lib.infer_dtype(arr, skipna=False) == "mixed" 992 993 # pd.NaT 994 arr = np.array([pd.NaT]) 995 assert lib.infer_dtype(arr, skipna=False) == "datetime" 996 997 arr = np.array([pd.NaT, np.nan]) 998 assert lib.infer_dtype(arr, skipna=False) == "datetime" 999 1000 arr = np.array([np.nan, pd.NaT]) 1001 assert lib.infer_dtype(arr, skipna=False) == "datetime" 1002 1003 arr = np.array([np.nan, pd.NaT, np.nan]) 1004 assert lib.infer_dtype(arr, skipna=False) == "datetime" 1005 1006 arr = np.array([None, pd.NaT, None]) 1007 assert lib.infer_dtype(arr, skipna=False) == "datetime" 1008 1009 # np.datetime64(nat) 1010 arr = np.array([np.datetime64("nat")]) 1011 assert lib.infer_dtype(arr, skipna=False) == "datetime64" 1012 1013 for n in [np.nan, pd.NaT, None]: 1014 arr = np.array([n, np.datetime64("nat"), n]) 1015 assert lib.infer_dtype(arr, skipna=False) == "datetime64" 1016 1017 arr = np.array([pd.NaT, n, np.datetime64("nat"), n]) 1018 assert lib.infer_dtype(arr, skipna=False) == "datetime64" 1019 1020 arr = np.array([np.timedelta64("nat")], dtype=object) 1021 assert lib.infer_dtype(arr, skipna=False) == "timedelta" 1022 1023 for n in [np.nan, pd.NaT, None]: 1024 arr = np.array([n, np.timedelta64("nat"), n]) 1025 assert lib.infer_dtype(arr, skipna=False) == "timedelta" 1026 1027 arr = np.array([pd.NaT, n, np.timedelta64("nat"), n]) 1028 assert lib.infer_dtype(arr, skipna=False) == "timedelta" 1029 1030 # datetime / timedelta mixed 1031 arr = np.array([pd.NaT, np.datetime64("nat"), np.timedelta64("nat"), np.nan]) 1032 assert lib.infer_dtype(arr, skipna=False) == "mixed" 1033 1034 arr = np.array([np.timedelta64("nat"), np.datetime64("nat")], dtype=object) 1035 assert lib.infer_dtype(arr, skipna=False) == "mixed" 1036 1037 def test_is_datetimelike_array_all_nan_nat_like(self): 1038 arr = np.array([np.nan, pd.NaT, np.datetime64("nat")]) 1039 assert lib.is_datetime_array(arr) 1040 assert lib.is_datetime64_array(arr) 1041 assert not lib.is_timedelta_or_timedelta64_array(arr) 1042 1043 arr = np.array([np.nan, pd.NaT, np.timedelta64("nat")]) 1044 assert not lib.is_datetime_array(arr) 1045 assert not lib.is_datetime64_array(arr) 1046 assert lib.is_timedelta_or_timedelta64_array(arr) 1047 1048 arr = np.array([np.nan, pd.NaT, np.datetime64("nat"), np.timedelta64("nat")]) 1049 assert not lib.is_datetime_array(arr) 1050 assert not lib.is_datetime64_array(arr) 1051 assert not lib.is_timedelta_or_timedelta64_array(arr) 1052 1053 arr = np.array([np.nan, pd.NaT]) 1054 assert lib.is_datetime_array(arr) 1055 assert lib.is_datetime64_array(arr) 1056 assert lib.is_timedelta_or_timedelta64_array(arr) 1057 1058 arr = np.array([np.nan, np.nan], dtype=object) 1059 assert not lib.is_datetime_array(arr) 1060 assert not lib.is_datetime64_array(arr) 1061 assert not lib.is_timedelta_or_timedelta64_array(arr) 1062 1063 assert lib.is_datetime_with_singletz_array( 1064 np.array( 1065 [ 1066 Timestamp("20130101", tz="US/Eastern"), 1067 Timestamp("20130102", tz="US/Eastern"), 1068 ], 1069 dtype=object, 1070 ) 1071 ) 1072 assert not lib.is_datetime_with_singletz_array( 1073 np.array( 1074 [ 1075 Timestamp("20130101", tz="US/Eastern"), 1076 Timestamp("20130102", tz="CET"), 1077 ], 1078 dtype=object, 1079 ) 1080 ) 1081 1082 @pytest.mark.parametrize( 1083 "func", 1084 [ 1085 "is_datetime_array", 1086 "is_datetime64_array", 1087 "is_bool_array", 1088 "is_timedelta_or_timedelta64_array", 1089 "is_date_array", 1090 "is_time_array", 1091 "is_interval_array", 1092 "is_period_array", 1093 ], 1094 ) 1095 def test_other_dtypes_for_array(self, func): 1096 func = getattr(lib, func) 1097 arr = np.array(["foo", "bar"]) 1098 assert not func(arr) 1099 1100 arr = np.array([1, 2]) 1101 assert not func(arr) 1102 1103 def test_date(self): 1104 1105 dates = [date(2012, 1, day) for day in range(1, 20)] 1106 index = Index(dates) 1107 assert index.inferred_type == "date" 1108 1109 dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan] 1110 result = lib.infer_dtype(dates, skipna=False) 1111 assert result == "mixed" 1112 1113 result = lib.infer_dtype(dates, skipna=True) 1114 assert result == "date" 1115 1116 @pytest.mark.parametrize( 1117 "values", 1118 [ 1119 [date(2020, 1, 1), Timestamp("2020-01-01")], 1120 [Timestamp("2020-01-01"), date(2020, 1, 1)], 1121 [date(2020, 1, 1), pd.NaT], 1122 [pd.NaT, date(2020, 1, 1)], 1123 ], 1124 ) 1125 @pytest.mark.parametrize("skipna", [True, False]) 1126 def test_infer_dtype_date_order_invariant(self, values, skipna): 1127 # https://github.com/pandas-dev/pandas/issues/33741 1128 result = lib.infer_dtype(values, skipna=skipna) 1129 assert result == "date" 1130 1131 def test_is_numeric_array(self): 1132 1133 assert lib.is_float_array(np.array([1, 2.0])) 1134 assert lib.is_float_array(np.array([1, 2.0, np.nan])) 1135 assert not lib.is_float_array(np.array([1, 2])) 1136 1137 assert lib.is_integer_array(np.array([1, 2])) 1138 assert not lib.is_integer_array(np.array([1, 2.0])) 1139 1140 def test_is_string_array(self): 1141 1142 assert lib.is_string_array(np.array(["foo", "bar"])) 1143 assert not lib.is_string_array( 1144 np.array(["foo", "bar", pd.NA], dtype=object), skipna=False 1145 ) 1146 assert lib.is_string_array( 1147 np.array(["foo", "bar", pd.NA], dtype=object), skipna=True 1148 ) 1149 # NaN is not valid for string array, just NA 1150 assert not lib.is_string_array( 1151 np.array(["foo", "bar", np.nan], dtype=object), skipna=True 1152 ) 1153 1154 assert not lib.is_string_array(np.array([1, 2])) 1155 1156 def test_to_object_array_tuples(self): 1157 r = (5, 6) 1158 values = [r] 1159 lib.to_object_array_tuples(values) 1160 1161 # make sure record array works 1162 record = namedtuple("record", "x y") 1163 r = record(5, 6) 1164 values = [r] 1165 lib.to_object_array_tuples(values) 1166 1167 def test_object(self): 1168 1169 # GH 7431 1170 # cannot infer more than this as only a single element 1171 arr = np.array([None], dtype="O") 1172 result = lib.infer_dtype(arr, skipna=False) 1173 assert result == "mixed" 1174 result = lib.infer_dtype(arr, skipna=True) 1175 assert result == "empty" 1176 1177 def test_to_object_array_width(self): 1178 # see gh-13320 1179 rows = [[1, 2, 3], [4, 5, 6]] 1180 1181 expected = np.array(rows, dtype=object) 1182 out = lib.to_object_array(rows) 1183 tm.assert_numpy_array_equal(out, expected) 1184 1185 expected = np.array(rows, dtype=object) 1186 out = lib.to_object_array(rows, min_width=1) 1187 tm.assert_numpy_array_equal(out, expected) 1188 1189 expected = np.array( 1190 [[1, 2, 3, None, None], [4, 5, 6, None, None]], dtype=object 1191 ) 1192 out = lib.to_object_array(rows, min_width=5) 1193 tm.assert_numpy_array_equal(out, expected) 1194 1195 def test_is_period(self): 1196 assert lib.is_period(Period("2011-01", freq="M")) 1197 assert not lib.is_period(PeriodIndex(["2011-01"], freq="M")) 1198 assert not lib.is_period(Timestamp("2011-01")) 1199 assert not lib.is_period(1) 1200 assert not lib.is_period(np.nan) 1201 1202 def test_categorical(self): 1203 1204 # GH 8974 1205 arr = Categorical(list("abc")) 1206 result = lib.infer_dtype(arr, skipna=True) 1207 assert result == "categorical" 1208 1209 result = lib.infer_dtype(Series(arr), skipna=True) 1210 assert result == "categorical" 1211 1212 arr = Categorical(list("abc"), categories=["cegfab"], ordered=True) 1213 result = lib.infer_dtype(arr, skipna=True) 1214 assert result == "categorical" 1215 1216 result = lib.infer_dtype(Series(arr), skipna=True) 1217 assert result == "categorical" 1218 1219 def test_interval(self): 1220 idx = pd.IntervalIndex.from_breaks(range(5), closed="both") 1221 inferred = lib.infer_dtype(idx, skipna=False) 1222 assert inferred == "interval" 1223 1224 inferred = lib.infer_dtype(idx._data, skipna=False) 1225 assert inferred == "interval" 1226 1227 inferred = lib.infer_dtype(Series(idx), skipna=False) 1228 assert inferred == "interval" 1229 1230 @pytest.mark.parametrize("klass", [pd.array, pd.Series]) 1231 @pytest.mark.parametrize("skipna", [True, False]) 1232 @pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]]) 1233 def test_string_dtype(self, data, skipna, klass): 1234 # StringArray 1235 val = klass(data, dtype="string") 1236 inferred = lib.infer_dtype(val, skipna=skipna) 1237 assert inferred == "string" 1238 1239 @pytest.mark.parametrize("klass", [pd.array, pd.Series]) 1240 @pytest.mark.parametrize("skipna", [True, False]) 1241 @pytest.mark.parametrize("data", [[True, False, True], [True, False, pd.NA]]) 1242 def test_boolean_dtype(self, data, skipna, klass): 1243 # BooleanArray 1244 val = klass(data, dtype="boolean") 1245 inferred = lib.infer_dtype(val, skipna=skipna) 1246 assert inferred == "boolean" 1247 1248 1249class TestNumberScalar: 1250 def test_is_number(self): 1251 1252 assert is_number(True) 1253 assert is_number(1) 1254 assert is_number(1.1) 1255 assert is_number(1 + 3j) 1256 assert is_number(np.int64(1)) 1257 assert is_number(np.float64(1.1)) 1258 assert is_number(np.complex128(1 + 3j)) 1259 assert is_number(np.nan) 1260 1261 assert not is_number(None) 1262 assert not is_number("x") 1263 assert not is_number(datetime(2011, 1, 1)) 1264 assert not is_number(np.datetime64("2011-01-01")) 1265 assert not is_number(Timestamp("2011-01-01")) 1266 assert not is_number(Timestamp("2011-01-01", tz="US/Eastern")) 1267 assert not is_number(timedelta(1000)) 1268 assert not is_number(Timedelta("1 days")) 1269 1270 # questionable 1271 assert not is_number(np.bool_(False)) 1272 assert is_number(np.timedelta64(1, "D")) 1273 1274 def test_is_bool(self): 1275 assert is_bool(True) 1276 assert is_bool(False) 1277 assert is_bool(np.bool_(False)) 1278 1279 assert not is_bool(1) 1280 assert not is_bool(1.1) 1281 assert not is_bool(1 + 3j) 1282 assert not is_bool(np.int64(1)) 1283 assert not is_bool(np.float64(1.1)) 1284 assert not is_bool(np.complex128(1 + 3j)) 1285 assert not is_bool(np.nan) 1286 assert not is_bool(None) 1287 assert not is_bool("x") 1288 assert not is_bool(datetime(2011, 1, 1)) 1289 assert not is_bool(np.datetime64("2011-01-01")) 1290 assert not is_bool(Timestamp("2011-01-01")) 1291 assert not is_bool(Timestamp("2011-01-01", tz="US/Eastern")) 1292 assert not is_bool(timedelta(1000)) 1293 assert not is_bool(np.timedelta64(1, "D")) 1294 assert not is_bool(Timedelta("1 days")) 1295 1296 def test_is_integer(self): 1297 assert is_integer(1) 1298 assert is_integer(np.int64(1)) 1299 1300 assert not is_integer(True) 1301 assert not is_integer(1.1) 1302 assert not is_integer(1 + 3j) 1303 assert not is_integer(False) 1304 assert not is_integer(np.bool_(False)) 1305 assert not is_integer(np.float64(1.1)) 1306 assert not is_integer(np.complex128(1 + 3j)) 1307 assert not is_integer(np.nan) 1308 assert not is_integer(None) 1309 assert not is_integer("x") 1310 assert not is_integer(datetime(2011, 1, 1)) 1311 assert not is_integer(np.datetime64("2011-01-01")) 1312 assert not is_integer(Timestamp("2011-01-01")) 1313 assert not is_integer(Timestamp("2011-01-01", tz="US/Eastern")) 1314 assert not is_integer(timedelta(1000)) 1315 assert not is_integer(Timedelta("1 days")) 1316 assert not is_integer(np.timedelta64(1, "D")) 1317 1318 def test_is_float(self): 1319 assert is_float(1.1) 1320 assert is_float(np.float64(1.1)) 1321 assert is_float(np.nan) 1322 1323 assert not is_float(True) 1324 assert not is_float(1) 1325 assert not is_float(1 + 3j) 1326 assert not is_float(False) 1327 assert not is_float(np.bool_(False)) 1328 assert not is_float(np.int64(1)) 1329 assert not is_float(np.complex128(1 + 3j)) 1330 assert not is_float(None) 1331 assert not is_float("x") 1332 assert not is_float(datetime(2011, 1, 1)) 1333 assert not is_float(np.datetime64("2011-01-01")) 1334 assert not is_float(Timestamp("2011-01-01")) 1335 assert not is_float(Timestamp("2011-01-01", tz="US/Eastern")) 1336 assert not is_float(timedelta(1000)) 1337 assert not is_float(np.timedelta64(1, "D")) 1338 assert not is_float(Timedelta("1 days")) 1339 1340 def test_is_datetime_dtypes(self): 1341 1342 ts = pd.date_range("20130101", periods=3) 1343 tsa = pd.date_range("20130101", periods=3, tz="US/Eastern") 1344 1345 assert is_datetime64_dtype("datetime64") 1346 assert is_datetime64_dtype("datetime64[ns]") 1347 assert is_datetime64_dtype(ts) 1348 assert not is_datetime64_dtype(tsa) 1349 1350 assert not is_datetime64_ns_dtype("datetime64") 1351 assert is_datetime64_ns_dtype("datetime64[ns]") 1352 assert is_datetime64_ns_dtype(ts) 1353 assert is_datetime64_ns_dtype(tsa) 1354 1355 assert is_datetime64_any_dtype("datetime64") 1356 assert is_datetime64_any_dtype("datetime64[ns]") 1357 assert is_datetime64_any_dtype(ts) 1358 assert is_datetime64_any_dtype(tsa) 1359 1360 assert not is_datetime64tz_dtype("datetime64") 1361 assert not is_datetime64tz_dtype("datetime64[ns]") 1362 assert not is_datetime64tz_dtype(ts) 1363 assert is_datetime64tz_dtype(tsa) 1364 1365 for tz in ["US/Eastern", "UTC"]: 1366 dtype = f"datetime64[ns, {tz}]" 1367 assert not is_datetime64_dtype(dtype) 1368 assert is_datetime64tz_dtype(dtype) 1369 assert is_datetime64_ns_dtype(dtype) 1370 assert is_datetime64_any_dtype(dtype) 1371 1372 def test_is_timedelta(self): 1373 assert is_timedelta64_dtype("timedelta64") 1374 assert is_timedelta64_dtype("timedelta64[ns]") 1375 assert not is_timedelta64_ns_dtype("timedelta64") 1376 assert is_timedelta64_ns_dtype("timedelta64[ns]") 1377 1378 tdi = TimedeltaIndex([1e14, 2e14], dtype="timedelta64[ns]") 1379 assert is_timedelta64_dtype(tdi) 1380 assert is_timedelta64_ns_dtype(tdi) 1381 assert is_timedelta64_ns_dtype(tdi.astype("timedelta64[ns]")) 1382 1383 # Conversion to Int64Index: 1384 assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64")) 1385 assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64[h]")) 1386 1387 1388class TestIsScalar: 1389 def test_is_scalar_builtin_scalars(self): 1390 assert is_scalar(None) 1391 assert is_scalar(True) 1392 assert is_scalar(False) 1393 assert is_scalar(Fraction()) 1394 assert is_scalar(0.0) 1395 assert is_scalar(1) 1396 assert is_scalar(complex(2)) 1397 assert is_scalar(float("NaN")) 1398 assert is_scalar(np.nan) 1399 assert is_scalar("foobar") 1400 assert is_scalar(b"foobar") 1401 assert is_scalar(datetime(2014, 1, 1)) 1402 assert is_scalar(date(2014, 1, 1)) 1403 assert is_scalar(time(12, 0)) 1404 assert is_scalar(timedelta(hours=1)) 1405 assert is_scalar(pd.NaT) 1406 assert is_scalar(pd.NA) 1407 1408 def test_is_scalar_builtin_nonscalars(self): 1409 assert not is_scalar({}) 1410 assert not is_scalar([]) 1411 assert not is_scalar([1]) 1412 assert not is_scalar(()) 1413 assert not is_scalar((1,)) 1414 assert not is_scalar(slice(None)) 1415 assert not is_scalar(Ellipsis) 1416 1417 def test_is_scalar_numpy_array_scalars(self): 1418 assert is_scalar(np.int64(1)) 1419 assert is_scalar(np.float64(1.0)) 1420 assert is_scalar(np.int32(1)) 1421 assert is_scalar(np.complex64(2)) 1422 assert is_scalar(np.object_("foobar")) 1423 assert is_scalar(np.str_("foobar")) 1424 assert is_scalar(np.unicode_("foobar")) 1425 assert is_scalar(np.bytes_(b"foobar")) 1426 assert is_scalar(np.datetime64("2014-01-01")) 1427 assert is_scalar(np.timedelta64(1, "h")) 1428 1429 def test_is_scalar_numpy_zerodim_arrays(self): 1430 for zerodim in [ 1431 np.array(1), 1432 np.array("foobar"), 1433 np.array(np.datetime64("2014-01-01")), 1434 np.array(np.timedelta64(1, "h")), 1435 np.array(np.datetime64("NaT")), 1436 ]: 1437 assert not is_scalar(zerodim) 1438 assert is_scalar(lib.item_from_zerodim(zerodim)) 1439 1440 @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") 1441 def test_is_scalar_numpy_arrays(self): 1442 assert not is_scalar(np.array([])) 1443 assert not is_scalar(np.array([[]])) 1444 assert not is_scalar(np.matrix("1; 2")) 1445 1446 def test_is_scalar_pandas_scalars(self): 1447 assert is_scalar(Timestamp("2014-01-01")) 1448 assert is_scalar(Timedelta(hours=1)) 1449 assert is_scalar(Period("2014-01-01")) 1450 assert is_scalar(Interval(left=0, right=1)) 1451 assert is_scalar(DateOffset(days=1)) 1452 assert is_scalar(pd.offsets.Minute(3)) 1453 1454 def test_is_scalar_pandas_containers(self): 1455 assert not is_scalar(Series(dtype=object)) 1456 assert not is_scalar(Series([1])) 1457 assert not is_scalar(DataFrame()) 1458 assert not is_scalar(DataFrame([[1]])) 1459 assert not is_scalar(Index([])) 1460 assert not is_scalar(Index([1])) 1461 assert not is_scalar(Categorical([])) 1462 assert not is_scalar(DatetimeIndex([])._data) 1463 assert not is_scalar(TimedeltaIndex([])._data) 1464 assert not is_scalar(DatetimeIndex([])._data.to_period("D")) 1465 assert not is_scalar(pd.array([1, 2, 3])) 1466 1467 def test_is_scalar_number(self): 1468 # Number() is not recognied by PyNumber_Check, so by extension 1469 # is not recognized by is_scalar, but instances of non-abstract 1470 # subclasses are. 1471 1472 class Numeric(Number): 1473 def __init__(self, value): 1474 self.value = value 1475 1476 def __int__(self): 1477 return self.value 1478 1479 num = Numeric(1) 1480 assert is_scalar(num) 1481 1482 1483def test_datetimeindex_from_empty_datetime64_array(): 1484 for unit in ["ms", "us", "ns"]: 1485 idx = DatetimeIndex(np.array([], dtype=f"datetime64[{unit}]")) 1486 assert len(idx) == 0 1487 1488 1489def test_nan_to_nat_conversions(): 1490 1491 df = DataFrame( 1492 {"A": np.asarray(range(10), dtype="float64"), "B": Timestamp("20010101")} 1493 ) 1494 df.iloc[3:6, :] = np.nan 1495 result = df.loc[4, "B"] 1496 assert result is pd.NaT 1497 1498 s = df["B"].copy() 1499 s[8:9] = np.nan 1500 assert s[8] is pd.NaT 1501 1502 1503@td.skip_if_no_scipy 1504@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") 1505def test_is_scipy_sparse(spmatrix): 1506 assert is_scipy_sparse(spmatrix([[0, 1]])) 1507 assert not is_scipy_sparse(np.array([1])) 1508 1509 1510def test_ensure_int32(): 1511 values = np.arange(10, dtype=np.int32) 1512 result = ensure_int32(values) 1513 assert result.dtype == np.int32 1514 1515 values = np.arange(10, dtype=np.int64) 1516 result = ensure_int32(values) 1517 assert result.dtype == np.int32 1518