1import numpy as np
2import pytest
3
4from pandas.core.dtypes.dtypes import CategoricalDtype
5
6import pandas as pd
7from pandas import Categorical, DataFrame, Series
8import pandas._testing as tm
9
10
11class TestSeriesDtypes:
12    def test_dtype(self, datetime_series):
13
14        assert datetime_series.dtype == np.dtype("float64")
15        assert datetime_series.dtypes == np.dtype("float64")
16
17    def test_astype_from_categorical(self):
18        items = ["a", "b", "c", "a"]
19        s = Series(items)
20        exp = Series(Categorical(items))
21        res = s.astype("category")
22        tm.assert_series_equal(res, exp)
23
24        items = [1, 2, 3, 1]
25        s = Series(items)
26        exp = Series(Categorical(items))
27        res = s.astype("category")
28        tm.assert_series_equal(res, exp)
29
30        df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]})
31        cats = Categorical([1, 2, 3, 4, 5, 6])
32        exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
33        df["cats"] = df["cats"].astype("category")
34        tm.assert_frame_equal(exp_df, df)
35
36        df = DataFrame(
37            {"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]}
38        )
39        cats = Categorical(["a", "b", "b", "a", "a", "d"])
40        exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
41        df["cats"] = df["cats"].astype("category")
42        tm.assert_frame_equal(exp_df, df)
43
44        # with keywords
45        lst = ["a", "b", "c", "a"]
46        s = Series(lst)
47        exp = Series(Categorical(lst, ordered=True))
48        res = s.astype(CategoricalDtype(None, ordered=True))
49        tm.assert_series_equal(res, exp)
50
51        exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True))
52        res = s.astype(CategoricalDtype(list("abcdef"), ordered=True))
53        tm.assert_series_equal(res, exp)
54
55    def test_astype_categorical_to_other(self):
56        cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
57        ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values()
58        ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat)
59
60        expected = ser
61        tm.assert_series_equal(ser.astype("category"), expected)
62        tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
63        msg = r"Cannot cast object dtype to float64"
64        with pytest.raises(ValueError, match=msg):
65            ser.astype("float64")
66
67        cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
68        exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
69        tm.assert_series_equal(cat.astype("str"), exp)
70        s2 = Series(Categorical(["1", "2", "3", "4"]))
71        exp2 = Series([1, 2, 3, 4]).astype("int")
72        tm.assert_series_equal(s2.astype("int"), exp2)
73
74        # object don't sort correctly, so just compare that we have the same
75        # values
76        def cmp(a, b):
77            tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
78
79        expected = Series(np.array(ser.values), name="value_group")
80        cmp(ser.astype("object"), expected)
81        cmp(ser.astype(np.object_), expected)
82
83        # array conversion
84        tm.assert_almost_equal(np.array(ser), np.array(ser.values))
85
86        tm.assert_series_equal(ser.astype("category"), ser)
87        tm.assert_series_equal(ser.astype(CategoricalDtype()), ser)
88
89        roundtrip_expected = ser.cat.set_categories(
90            ser.cat.categories.sort_values()
91        ).cat.remove_unused_categories()
92        result = ser.astype("object").astype("category")
93        tm.assert_series_equal(result, roundtrip_expected)
94        result = ser.astype("object").astype(CategoricalDtype())
95        tm.assert_series_equal(result, roundtrip_expected)
96
97    def test_astype_categorical_invalid_conversions(self):
98        # invalid conversion (these are NOT a dtype)
99        cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
100        ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values()
101        ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat)
102
103        msg = (
104            "dtype '<class 'pandas.core.arrays.categorical.Categorical'>' "
105            "not understood"
106        )
107        with pytest.raises(TypeError, match=msg):
108            ser.astype(Categorical)
109        with pytest.raises(TypeError, match=msg):
110            ser.astype("object").astype(Categorical)
111
112    def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype):
113        # GH 39402
114
115        df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])})
116        df.col1 = df.col1.astype("category")
117        df.col1 = df.col1.astype(any_int_or_nullable_int_dtype)
118        expected = DataFrame(
119            {"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)}
120        )
121        tm.assert_frame_equal(df, expected)
122
123    def test_series_to_categorical(self):
124        # see gh-16524: test conversion of Series to Categorical
125        series = Series(["a", "b", "c"])
126
127        result = Series(series, dtype="category")
128        expected = Series(["a", "b", "c"], dtype="category")
129
130        tm.assert_series_equal(result, expected)
131
132    def test_reindex_astype_order_consistency(self):
133        # GH 17444
134        s = Series([1, 2, 3], index=[2, 0, 1])
135        new_index = [0, 1, 2]
136        temp_dtype = "category"
137        new_dtype = str
138        s1 = s.reindex(new_index).astype(temp_dtype).astype(new_dtype)
139        s2 = s.astype(temp_dtype).reindex(new_index).astype(new_dtype)
140        tm.assert_series_equal(s1, s2)
141