1import numpy as np 2import pytest 3 4from pandas.core.dtypes.dtypes import CategoricalDtype 5 6import pandas as pd 7from pandas import Categorical, DataFrame, Series 8import pandas._testing as tm 9 10 11class TestSeriesDtypes: 12 def test_dtype(self, datetime_series): 13 14 assert datetime_series.dtype == np.dtype("float64") 15 assert datetime_series.dtypes == np.dtype("float64") 16 17 def test_astype_from_categorical(self): 18 items = ["a", "b", "c", "a"] 19 s = Series(items) 20 exp = Series(Categorical(items)) 21 res = s.astype("category") 22 tm.assert_series_equal(res, exp) 23 24 items = [1, 2, 3, 1] 25 s = Series(items) 26 exp = Series(Categorical(items)) 27 res = s.astype("category") 28 tm.assert_series_equal(res, exp) 29 30 df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]}) 31 cats = Categorical([1, 2, 3, 4, 5, 6]) 32 exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) 33 df["cats"] = df["cats"].astype("category") 34 tm.assert_frame_equal(exp_df, df) 35 36 df = DataFrame( 37 {"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]} 38 ) 39 cats = Categorical(["a", "b", "b", "a", "a", "d"]) 40 exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) 41 df["cats"] = df["cats"].astype("category") 42 tm.assert_frame_equal(exp_df, df) 43 44 # with keywords 45 lst = ["a", "b", "c", "a"] 46 s = Series(lst) 47 exp = Series(Categorical(lst, ordered=True)) 48 res = s.astype(CategoricalDtype(None, ordered=True)) 49 tm.assert_series_equal(res, exp) 50 51 exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True)) 52 res = s.astype(CategoricalDtype(list("abcdef"), ordered=True)) 53 tm.assert_series_equal(res, exp) 54 55 def test_astype_categorical_to_other(self): 56 cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) 57 ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values() 58 ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat) 59 60 expected = ser 61 tm.assert_series_equal(ser.astype("category"), expected) 62 tm.assert_series_equal(ser.astype(CategoricalDtype()), expected) 63 msg = r"Cannot cast object dtype to float64" 64 with pytest.raises(ValueError, match=msg): 65 ser.astype("float64") 66 67 cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])) 68 exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"]) 69 tm.assert_series_equal(cat.astype("str"), exp) 70 s2 = Series(Categorical(["1", "2", "3", "4"])) 71 exp2 = Series([1, 2, 3, 4]).astype("int") 72 tm.assert_series_equal(s2.astype("int"), exp2) 73 74 # object don't sort correctly, so just compare that we have the same 75 # values 76 def cmp(a, b): 77 tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b))) 78 79 expected = Series(np.array(ser.values), name="value_group") 80 cmp(ser.astype("object"), expected) 81 cmp(ser.astype(np.object_), expected) 82 83 # array conversion 84 tm.assert_almost_equal(np.array(ser), np.array(ser.values)) 85 86 tm.assert_series_equal(ser.astype("category"), ser) 87 tm.assert_series_equal(ser.astype(CategoricalDtype()), ser) 88 89 roundtrip_expected = ser.cat.set_categories( 90 ser.cat.categories.sort_values() 91 ).cat.remove_unused_categories() 92 result = ser.astype("object").astype("category") 93 tm.assert_series_equal(result, roundtrip_expected) 94 result = ser.astype("object").astype(CategoricalDtype()) 95 tm.assert_series_equal(result, roundtrip_expected) 96 97 def test_astype_categorical_invalid_conversions(self): 98 # invalid conversion (these are NOT a dtype) 99 cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) 100 ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values() 101 ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat) 102 103 msg = ( 104 "dtype '<class 'pandas.core.arrays.categorical.Categorical'>' " 105 "not understood" 106 ) 107 with pytest.raises(TypeError, match=msg): 108 ser.astype(Categorical) 109 with pytest.raises(TypeError, match=msg): 110 ser.astype("object").astype(Categorical) 111 112 def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype): 113 # GH 39402 114 115 df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])}) 116 df.col1 = df.col1.astype("category") 117 df.col1 = df.col1.astype(any_int_or_nullable_int_dtype) 118 expected = DataFrame( 119 {"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)} 120 ) 121 tm.assert_frame_equal(df, expected) 122 123 def test_series_to_categorical(self): 124 # see gh-16524: test conversion of Series to Categorical 125 series = Series(["a", "b", "c"]) 126 127 result = Series(series, dtype="category") 128 expected = Series(["a", "b", "c"], dtype="category") 129 130 tm.assert_series_equal(result, expected) 131 132 def test_reindex_astype_order_consistency(self): 133 # GH 17444 134 s = Series([1, 2, 3], index=[2, 0, 1]) 135 new_index = [0, 1, 2] 136 temp_dtype = "category" 137 new_dtype = str 138 s1 = s.reindex(new_index).astype(temp_dtype).astype(new_dtype) 139 s2 = s.astype(temp_dtype).reindex(new_index).astype(new_dtype) 140 tm.assert_series_equal(s1, s2) 141