1from datetime import timedelta 2 3import numpy as np 4 5from pandas._libs import iNaT 6 7import pandas as pd 8from pandas import Categorical, Index, NaT, Series, isna 9import pandas._testing as tm 10 11 12class TestSeriesMissingData: 13 def test_categorical_nan_handling(self): 14 15 # NaNs are represented as -1 in labels 16 s = Series(Categorical(["a", "b", np.nan, "a"])) 17 tm.assert_index_equal(s.cat.categories, Index(["a", "b"])) 18 tm.assert_numpy_array_equal( 19 s.values.codes, np.array([0, 1, -1, 0], dtype=np.int8) 20 ) 21 22 def test_isna_for_inf(self): 23 s = Series(["a", np.inf, np.nan, pd.NA, 1.0]) 24 with pd.option_context("mode.use_inf_as_na", True): 25 r = s.isna() 26 dr = s.dropna() 27 e = Series([False, True, True, True, False]) 28 de = Series(["a", 1.0], index=[0, 4]) 29 tm.assert_series_equal(r, e) 30 tm.assert_series_equal(dr, de) 31 32 def test_isnull_for_inf_deprecated(self): 33 # gh-17115 34 s = Series(["a", np.inf, np.nan, 1.0]) 35 with pd.option_context("mode.use_inf_as_null", True): 36 r = s.isna() 37 dr = s.dropna() 38 39 e = Series([False, True, True, False]) 40 de = Series(["a", 1.0], index=[0, 3]) 41 tm.assert_series_equal(r, e) 42 tm.assert_series_equal(dr, de) 43 44 def test_timedelta64_nan(self): 45 46 td = Series([timedelta(days=i) for i in range(10)]) 47 48 # nan ops on timedeltas 49 td1 = td.copy() 50 td1[0] = np.nan 51 assert isna(td1[0]) 52 assert td1[0].value == iNaT 53 td1[0] = td[0] 54 assert not isna(td1[0]) 55 56 # GH#16674 iNaT is treated as an integer when given by the user 57 td1[1] = iNaT 58 assert not isna(td1[1]) 59 assert td1.dtype == np.object_ 60 assert td1[1] == iNaT 61 td1[1] = td[1] 62 assert not isna(td1[1]) 63 64 td1[2] = NaT 65 assert isna(td1[2]) 66 assert td1[2].value == iNaT 67 td1[2] = td[2] 68 assert not isna(td1[2]) 69 70 # FIXME: don't leave commented-out 71 # boolean setting 72 # this doesn't work, not sure numpy even supports it 73 # result = td[(td>np.timedelta64(timedelta(days=3))) & 74 # td<np.timedelta64(timedelta(days=7)))] = np.nan 75 # assert isna(result).sum() == 7 76 77 # NumPy limitation =( 78 79 # def test_logical_range_select(self): 80 # np.random.seed(12345) 81 # selector = -0.5 <= datetime_series <= 0.5 82 # expected = (datetime_series >= -0.5) & (datetime_series <= 0.5) 83 # tm.assert_series_equal(selector, expected) 84 85 def test_valid(self, datetime_series): 86 ts = datetime_series.copy() 87 ts.index = ts.index._with_freq(None) 88 ts[::2] = np.NaN 89 90 result = ts.dropna() 91 assert len(result) == ts.count() 92 tm.assert_series_equal(result, ts[1::2]) 93 tm.assert_series_equal(result, ts[pd.notna(ts)]) 94 95 96def test_hasnans_uncached_for_series(): 97 # GH#19700 98 idx = Index([0, 1]) 99 assert idx.hasnans is False 100 assert "hasnans" in idx._cache 101 ser = idx.to_series() 102 assert ser.hasnans is False 103 assert not hasattr(ser, "_cache") 104 ser.iloc[-1] = np.nan 105 assert ser.hasnans is True 106 assert Series.hasnans.__doc__ == Index.hasnans.__doc__ 107