1from datetime import datetime, timedelta
2from importlib import reload
3import string
4import sys
5
6import numpy as np
7import pytest
8
9from pandas._libs.tslibs import iNaT
10
11from pandas import (
12    NA,
13    Categorical,
14    CategoricalDtype,
15    Index,
16    Interval,
17    NaT,
18    Series,
19    Timedelta,
20    Timestamp,
21    cut,
22    date_range,
23)
24import pandas._testing as tm
25
26
27class TestAstypeAPI:
28    def test_arg_for_errors_in_astype(self):
29        # see GH#14878
30        ser = Series([1, 2, 3])
31
32        msg = (
33            r"Expected value of kwarg 'errors' to be one of \['raise', "
34            r"'ignore'\]\. Supplied value is 'False'"
35        )
36        with pytest.raises(ValueError, match=msg):
37            ser.astype(np.float64, errors=False)
38
39        ser.astype(np.int8, errors="raise")
40
41    @pytest.mark.parametrize("dtype_class", [dict, Series])
42    def test_astype_dict_like(self, dtype_class):
43        # see GH#7271
44        ser = Series(range(0, 10, 2), name="abc")
45
46        dt1 = dtype_class({"abc": str})
47        result = ser.astype(dt1)
48        expected = Series(["0", "2", "4", "6", "8"], name="abc")
49        tm.assert_series_equal(result, expected)
50
51        dt2 = dtype_class({"abc": "float64"})
52        result = ser.astype(dt2)
53        expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
54        tm.assert_series_equal(result, expected)
55
56        dt3 = dtype_class({"abc": str, "def": str})
57        msg = (
58            "Only the Series name can be used for the key in Series dtype "
59            r"mappings\."
60        )
61        with pytest.raises(KeyError, match=msg):
62            ser.astype(dt3)
63
64        dt4 = dtype_class({0: str})
65        with pytest.raises(KeyError, match=msg):
66            ser.astype(dt4)
67
68        # GH#16717
69        # if dtypes provided is empty, it should error
70        if dtype_class is Series:
71            dt5 = dtype_class({}, dtype=object)
72        else:
73            dt5 = dtype_class({})
74
75        with pytest.raises(KeyError, match=msg):
76            ser.astype(dt5)
77
78
79class TestAstype:
80    @pytest.mark.parametrize("dtype", np.typecodes["All"])
81    def test_astype_empty_constructor_equality(self, dtype):
82        # see GH#15524
83
84        if dtype not in (
85            "S",
86            "V",  # poor support (if any) currently
87            "M",
88            "m",  # Generic timestamps raise a ValueError. Already tested.
89        ):
90            init_empty = Series([], dtype=dtype)
91            with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
92                as_type_empty = Series([]).astype(dtype)
93            tm.assert_series_equal(init_empty, as_type_empty)
94
95    @pytest.mark.parametrize("dtype", [str, np.str_])
96    @pytest.mark.parametrize(
97        "series",
98        [
99            Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
100            Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]),
101        ],
102    )
103    def test_astype_str_map(self, dtype, series):
104        # see GH#4405
105        result = series.astype(dtype)
106        expected = series.map(str)
107        tm.assert_series_equal(result, expected)
108
109    def test_astype_float_to_period(self):
110        result = Series([np.nan]).astype("period[D]")
111        expected = Series([NaT], dtype="period[D]")
112        tm.assert_series_equal(result, expected)
113
114    def test_astype_no_pandas_dtype(self):
115        # https://github.com/pandas-dev/pandas/pull/24866
116        ser = Series([1, 2], dtype="int64")
117        # Don't have PandasDtype in the public API, so we use `.array.dtype`,
118        # which is a PandasDtype.
119        result = ser.astype(ser.array.dtype)
120        tm.assert_series_equal(result, ser)
121
122    @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
123    def test_astype_generic_timestamp_no_frequency(self, dtype, request):
124        # see GH#15524, GH#15987
125        data = [1]
126        s = Series(data)
127
128        if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
129            mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
130            request.node.add_marker(mark)
131
132        msg = (
133            fr"The '{dtype.__name__}' dtype has no unit\. "
134            fr"Please pass in '{dtype.__name__}\[ns\]' instead."
135        )
136        with pytest.raises(ValueError, match=msg):
137            s.astype(dtype)
138
139    def test_astype_dt64_to_str(self):
140        # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
141        dti = date_range("2012-01-01", periods=3)
142        result = Series(dti).astype(str)
143        expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
144        tm.assert_series_equal(result, expected)
145
146    def test_astype_dt64tz_to_str(self):
147        # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
148        dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern")
149        result = Series(dti_tz).astype(str)
150        expected = Series(
151            [
152                "2012-01-01 00:00:00-05:00",
153                "2012-01-02 00:00:00-05:00",
154                "2012-01-03 00:00:00-05:00",
155            ],
156            dtype=object,
157        )
158        tm.assert_series_equal(result, expected)
159
160    def test_astype_datetime(self):
161        s = Series(iNaT, dtype="M8[ns]", index=range(5))
162
163        s = s.astype("O")
164        assert s.dtype == np.object_
165
166        s = Series([datetime(2001, 1, 2, 0, 0)])
167
168        s = s.astype("O")
169        assert s.dtype == np.object_
170
171        s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
172
173        s[1] = np.nan
174        assert s.dtype == "M8[ns]"
175
176        s = s.astype("O")
177        assert s.dtype == np.object_
178
179    def test_astype_datetime64tz(self):
180        s = Series(date_range("20130101", periods=3, tz="US/Eastern"))
181
182        # astype
183        result = s.astype(object)
184        expected = Series(s.astype(object), dtype=object)
185        tm.assert_series_equal(result, expected)
186
187        result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz)
188        tm.assert_series_equal(result, s)
189
190        # astype - object, preserves on construction
191        result = Series(s.astype(object))
192        expected = s.astype(object)
193        tm.assert_series_equal(result, expected)
194
195        # astype - datetime64[ns, tz]
196        result = Series(s.values).astype("datetime64[ns, US/Eastern]")
197        tm.assert_series_equal(result, s)
198
199        result = Series(s.values).astype(s.dtype)
200        tm.assert_series_equal(result, s)
201
202        result = s.astype("datetime64[ns, CET]")
203        expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
204        tm.assert_series_equal(result, expected)
205
206    def test_astype_str_cast_dt64(self):
207        # see GH#9757
208        ts = Series([Timestamp("2010-01-04 00:00:00")])
209        s = ts.astype(str)
210
211        expected = Series(["2010-01-04"])
212        tm.assert_series_equal(s, expected)
213
214        ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
215        s = ts.astype(str)
216
217        expected = Series(["2010-01-04 00:00:00-05:00"])
218        tm.assert_series_equal(s, expected)
219
220    def test_astype_str_cast_td64(self):
221        # see GH#9757
222
223        td = Series([Timedelta(1, unit="d")])
224        ser = td.astype(str)
225
226        expected = Series(["1 days"])
227        tm.assert_series_equal(ser, expected)
228
229    def test_dt64_series_astype_object(self):
230        dt64ser = Series(date_range("20130101", periods=3))
231        result = dt64ser.astype(object)
232        assert isinstance(result.iloc[0], datetime)
233        assert result.dtype == np.object_
234
235    def test_td64_series_astype_object(self):
236        tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
237        result = tdser.astype(object)
238        assert isinstance(result.iloc[0], timedelta)
239        assert result.dtype == np.object_
240
241    @pytest.mark.parametrize(
242        "values",
243        [
244            Series(["x", "y", "z"], dtype="string"),
245            Series(["x", "y", "z"], dtype="category"),
246            Series(3 * [Timestamp("2020-01-01", tz="UTC")]),
247            Series(3 * [Interval(0, 1)]),
248        ],
249    )
250    @pytest.mark.parametrize("errors", ["raise", "ignore"])
251    def test_astype_ignores_errors_for_extension_dtypes(self, values, errors):
252        # https://github.com/pandas-dev/pandas/issues/35471
253        if errors == "ignore":
254            expected = values
255            result = values.astype(float, errors="ignore")
256            tm.assert_series_equal(result, expected)
257        else:
258            msg = "(Cannot cast)|(could not convert)"
259            with pytest.raises((ValueError, TypeError), match=msg):
260                values.astype(float, errors=errors)
261
262    @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
263    def test_astype_from_float_to_str(self, dtype):
264        # https://github.com/pandas-dev/pandas/issues/36451
265        s = Series([0.1], dtype=dtype)
266        result = s.astype(str)
267        expected = Series(["0.1"])
268        tm.assert_series_equal(result, expected)
269
270    @pytest.mark.parametrize(
271        "value, string_value",
272        [
273            (None, "None"),
274            (np.nan, "nan"),
275            (NA, "<NA>"),
276        ],
277    )
278    def test_astype_to_str_preserves_na(self, value, string_value):
279        # https://github.com/pandas-dev/pandas/issues/36904
280        s = Series(["a", "b", value], dtype=object)
281        result = s.astype(str)
282        expected = Series(["a", "b", string_value], dtype=object)
283        tm.assert_series_equal(result, expected)
284
285    @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
286    def test_astype(self, dtype):
287        s = Series(np.random.randn(5), name="foo")
288        as_typed = s.astype(dtype)
289
290        assert as_typed.dtype == dtype
291        assert as_typed.name == s.name
292
293    @pytest.mark.parametrize("value", [np.nan, np.inf])
294    @pytest.mark.parametrize("dtype", [np.int32, np.int64])
295    def test_astype_cast_nan_inf_int(self, dtype, value):
296        # gh-14265: check NaN and inf raise error when converting to int
297        msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
298        s = Series([value])
299
300        with pytest.raises(ValueError, match=msg):
301            s.astype(dtype)
302
303    @pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
304    def test_astype_cast_object_int_fail(self, dtype):
305        arr = Series(["car", "house", "tree", "1"])
306        msg = r"invalid literal for int\(\) with base 10: 'car'"
307        with pytest.raises(ValueError, match=msg):
308            arr.astype(dtype)
309
310    def test_astype_cast_object_int(self):
311        arr = Series(["1", "2", "3", "4"], dtype=object)
312        result = arr.astype(int)
313
314        tm.assert_series_equal(result, Series(np.arange(1, 5)))
315
316    def test_astype_unicode(self):
317        # see GH#7758: A bit of magic is required to set
318        # default encoding to utf-8
319        digits = string.digits
320        test_series = [
321            Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
322            Series(["データーサイエンス、お前はもう死んでいる"]),
323        ]
324
325        former_encoding = None
326
327        if sys.getdefaultencoding() == "utf-8":
328            test_series.append(Series(["野菜食べないとやばい".encode()]))
329
330        for s in test_series:
331            res = s.astype("unicode")
332            expec = s.map(str)
333            tm.assert_series_equal(res, expec)
334
335        # Restore the former encoding
336        if former_encoding is not None and former_encoding != "utf-8":
337            reload(sys)
338            sys.setdefaultencoding(former_encoding)
339
340    def test_astype_bytes(self):
341        # GH#39474
342        result = Series(["foo", "bar", "baz"]).astype(bytes)
343        assert result.dtypes == np.dtype("S3")
344
345
346class TestAstypeCategorical:
347    def test_astype_categorical_invalid_conversions(self):
348        # invalid conversion (these are NOT a dtype)
349        cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
350        ser = Series(np.random.randint(0, 10000, 100)).sort_values()
351        ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
352
353        msg = (
354            "dtype '<class 'pandas.core.arrays.categorical.Categorical'>' "
355            "not understood"
356        )
357        with pytest.raises(TypeError, match=msg):
358            ser.astype(Categorical)
359        with pytest.raises(TypeError, match=msg):
360            ser.astype("object").astype(Categorical)
361
362    def test_astype_categoricaldtype(self):
363        s = Series(["a", "b", "a"])
364        result = s.astype(CategoricalDtype(["a", "b"], ordered=True))
365        expected = Series(Categorical(["a", "b", "a"], ordered=True))
366        tm.assert_series_equal(result, expected)
367
368        result = s.astype(CategoricalDtype(["a", "b"], ordered=False))
369        expected = Series(Categorical(["a", "b", "a"], ordered=False))
370        tm.assert_series_equal(result, expected)
371
372        result = s.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
373        expected = Series(
374            Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
375        )
376        tm.assert_series_equal(result, expected)
377        tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
378
379    @pytest.mark.parametrize("name", [None, "foo"])
380    @pytest.mark.parametrize("dtype_ordered", [True, False])
381    @pytest.mark.parametrize("series_ordered", [True, False])
382    def test_astype_categorical_to_categorical(
383        self, name, dtype_ordered, series_ordered
384    ):
385        # GH#10696, GH#18593
386        s_data = list("abcaacbab")
387        s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
388        s = Series(s_data, dtype=s_dtype, name=name)
389
390        # unspecified categories
391        dtype = CategoricalDtype(ordered=dtype_ordered)
392        result = s.astype(dtype)
393        exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
394        expected = Series(s_data, name=name, dtype=exp_dtype)
395        tm.assert_series_equal(result, expected)
396
397        # different categories
398        dtype = CategoricalDtype(list("adc"), dtype_ordered)
399        result = s.astype(dtype)
400        expected = Series(s_data, name=name, dtype=dtype)
401        tm.assert_series_equal(result, expected)
402
403        if dtype_ordered is False:
404            # not specifying ordered, so only test once
405            expected = s
406            result = s.astype("category")
407            tm.assert_series_equal(result, expected)
408
409    def test_astype_bool_missing_to_categorical(self):
410        # GH-19182
411        s = Series([True, False, np.nan])
412        assert s.dtypes == np.object_
413
414        result = s.astype(CategoricalDtype(categories=[True, False]))
415        expected = Series(Categorical([True, False, np.nan], categories=[True, False]))
416        tm.assert_series_equal(result, expected)
417
418    def test_astype_categories_raises(self):
419        # deprecated GH#17636, removed in GH#27141
420        s = Series(["a", "b", "a"])
421        with pytest.raises(TypeError, match="got an unexpected"):
422            s.astype("category", categories=["a", "b"], ordered=True)
423