1import numpy as np
2import pytest
3
4from pandas._libs.tslibs import iNaT
5from pandas._libs.tslibs.period import IncompatibleFrequency
6import pandas.util._test_decorators as td
7
8from pandas.core.dtypes.base import registry
9from pandas.core.dtypes.dtypes import PeriodDtype
10
11import pandas as pd
12import pandas._testing as tm
13from pandas.core.arrays import PeriodArray, period_array
14
15# ----------------------------------------------------------------------------
16# Dtype
17
18
19def test_registered():
20    assert PeriodDtype in registry.dtypes
21    result = registry.find("Period[D]")
22    expected = PeriodDtype("D")
23    assert result == expected
24
25
26# ----------------------------------------------------------------------------
27# period_array
28
29
30@pytest.mark.parametrize(
31    "data, freq, expected",
32    [
33        ([pd.Period("2017", "D")], None, [17167]),
34        ([pd.Period("2017", "D")], "D", [17167]),
35        ([2017], "D", [17167]),
36        (["2017"], "D", [17167]),
37        ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]),
38        ([pd.Period("2017", "D"), None], None, [17167, iNaT]),
39        (pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]),
40        (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]),
41        (pd.period_range("2017", periods=4, freq="Q"), None, [188, 189, 190, 191]),
42    ],
43)
44def test_period_array_ok(data, freq, expected):
45    result = period_array(data, freq=freq).asi8
46    expected = np.asarray(expected, dtype=np.int64)
47    tm.assert_numpy_array_equal(result, expected)
48
49
50def test_period_array_readonly_object():
51    # https://github.com/pandas-dev/pandas/issues/25403
52    pa = period_array([pd.Period("2019-01-01")])
53    arr = np.asarray(pa, dtype="object")
54    arr.setflags(write=False)
55
56    result = period_array(arr)
57    tm.assert_period_array_equal(result, pa)
58
59    result = pd.Series(arr)
60    tm.assert_series_equal(result, pd.Series(pa))
61
62    result = pd.DataFrame({"A": arr})
63    tm.assert_frame_equal(result, pd.DataFrame({"A": pa}))
64
65
66def test_from_datetime64_freq_changes():
67    # https://github.com/pandas-dev/pandas/issues/23438
68    arr = pd.date_range("2017", periods=3, freq="D")
69    result = PeriodArray._from_datetime64(arr, freq="M")
70    expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M")
71    tm.assert_period_array_equal(result, expected)
72
73
74@pytest.mark.parametrize(
75    "data, freq, msg",
76    [
77        (
78            [pd.Period("2017", "D"), pd.Period("2017", "A")],
79            None,
80            "Input has different freq",
81        ),
82        ([pd.Period("2017", "D")], "A", "Input has different freq"),
83    ],
84)
85def test_period_array_raises(data, freq, msg):
86    with pytest.raises(IncompatibleFrequency, match=msg):
87        period_array(data, freq)
88
89
90def test_period_array_non_period_series_raies():
91    ser = pd.Series([1, 2, 3])
92    with pytest.raises(TypeError, match="dtype"):
93        PeriodArray(ser, freq="D")
94
95
96def test_period_array_freq_mismatch():
97    arr = period_array(["2000", "2001"], freq="D")
98    with pytest.raises(IncompatibleFrequency, match="freq"):
99        PeriodArray(arr, freq="M")
100
101    with pytest.raises(IncompatibleFrequency, match="freq"):
102        PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd())
103
104
105def test_asi8():
106    result = period_array(["2000", "2001", None], freq="D").asi8
107    expected = np.array([10957, 11323, iNaT])
108    tm.assert_numpy_array_equal(result, expected)
109
110
111def test_take_raises():
112    arr = period_array(["2000", "2001"], freq="D")
113    with pytest.raises(IncompatibleFrequency, match="freq"):
114        arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W"))
115
116    msg = "value should be a 'Period' or 'NaT'. Got 'str' instead"
117    with pytest.raises(TypeError, match=msg):
118        arr.take([0, -1], allow_fill=True, fill_value="foo")
119
120
121@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
122def test_astype(dtype):
123    # We choose to ignore the sign and size of integers for
124    # Period/Datetime/Timedelta astype
125    arr = period_array(["2000", "2001", None], freq="D")
126    result = arr.astype(dtype)
127
128    if np.dtype(dtype).kind == "u":
129        expected_dtype = np.dtype("uint64")
130    else:
131        expected_dtype = np.dtype("int64")
132    expected = arr.astype(expected_dtype)
133
134    assert result.dtype == expected_dtype
135    tm.assert_numpy_array_equal(result, expected)
136
137
138def test_astype_copies():
139    arr = period_array(["2000", "2001", None], freq="D")
140    result = arr.astype(np.int64, copy=False)
141    # Add the `.base`, since we now use `.asi8` which returns a view.
142    # We could maybe override it in PeriodArray to return ._data directly.
143    assert result.base is arr._data
144
145    result = arr.astype(np.int64, copy=True)
146    assert result is not arr._data
147    tm.assert_numpy_array_equal(result, arr._data.view("i8"))
148
149
150def test_astype_categorical():
151    arr = period_array(["2000", "2001", "2001", None], freq="D")
152    result = arr.astype("category")
153    categories = pd.PeriodIndex(["2000", "2001"], freq="D")
154    expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
155    tm.assert_categorical_equal(result, expected)
156
157
158def test_astype_period():
159    arr = period_array(["2000", "2001", None], freq="D")
160    result = arr.astype(PeriodDtype("M"))
161    expected = period_array(["2000", "2001", None], freq="M")
162    tm.assert_period_array_equal(result, expected)
163
164
165@pytest.mark.parametrize("other", ["datetime64[ns]", "timedelta64[ns]"])
166def test_astype_datetime(other):
167    arr = period_array(["2000", "2001", None], freq="D")
168    # slice off the [ns] so that the regex matches.
169    with pytest.raises(TypeError, match=other[:-4]):
170        arr.astype(other)
171
172
173def test_fillna_raises():
174    arr = period_array(["2000", "2001", "2002"], freq="D")
175    with pytest.raises(ValueError, match="Length"):
176        arr.fillna(arr[:2])
177
178
179def test_fillna_copies():
180    arr = period_array(["2000", "2001", "2002"], freq="D")
181    result = arr.fillna(pd.Period("2000", "D"))
182    assert result is not arr
183
184
185# ----------------------------------------------------------------------------
186# setitem
187
188
189@pytest.mark.parametrize(
190    "key, value, expected",
191    [
192        ([0], pd.Period("2000", "D"), [10957, 1, 2]),
193        ([0], None, [iNaT, 1, 2]),
194        ([0], np.nan, [iNaT, 1, 2]),
195        ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3),
196        (
197            [0, 1, 2],
198            [pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")],
199            [10957, 11323, 11688],
200        ),
201    ],
202)
203def test_setitem(key, value, expected):
204    arr = PeriodArray(np.arange(3), freq="D")
205    expected = PeriodArray(expected, freq="D")
206    arr[key] = value
207    tm.assert_period_array_equal(arr, expected)
208
209
210def test_setitem_raises_incompatible_freq():
211    arr = PeriodArray(np.arange(3), freq="D")
212    with pytest.raises(IncompatibleFrequency, match="freq"):
213        arr[0] = pd.Period("2000", freq="A")
214
215    other = period_array(["2000", "2001"], freq="A")
216    with pytest.raises(IncompatibleFrequency, match="freq"):
217        arr[[0, 1]] = other
218
219
220def test_setitem_raises_length():
221    arr = PeriodArray(np.arange(3), freq="D")
222    with pytest.raises(ValueError, match="length"):
223        arr[[0, 1]] = [pd.Period("2000", freq="D")]
224
225
226def test_setitem_raises_type():
227    arr = PeriodArray(np.arange(3), freq="D")
228    with pytest.raises(TypeError, match="int"):
229        arr[0] = 1
230
231
232# ----------------------------------------------------------------------------
233# Ops
234
235
236def test_sub_period():
237    arr = period_array(["2000", "2001"], freq="D")
238    other = pd.Period("2000", freq="M")
239    with pytest.raises(IncompatibleFrequency, match="freq"):
240        arr - other
241
242
243# ----------------------------------------------------------------------------
244# Methods
245
246
247@pytest.mark.parametrize(
248    "other",
249    [pd.Period("2000", freq="H"), period_array(["2000", "2001", "2000"], freq="H")],
250)
251def test_where_different_freq_raises(other):
252    ser = pd.Series(period_array(["2000", "2001", "2002"], freq="D"))
253    cond = np.array([True, False, True])
254    with pytest.raises(IncompatibleFrequency, match="freq"):
255        ser.where(cond, other)
256
257
258# ----------------------------------------------------------------------------
259# Printing
260
261
262def test_repr_small():
263    arr = period_array(["2000", "2001"], freq="D")
264    result = str(arr)
265    expected = (
266        "<PeriodArray>\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]"
267    )
268    assert result == expected
269
270
271def test_repr_large():
272    arr = period_array(["2000", "2001"] * 500, freq="D")
273    result = str(arr)
274    expected = (
275        "<PeriodArray>\n"
276        "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
277        "'2000-01-01',\n"
278        " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
279        "'2001-01-01',\n"
280        " ...\n"
281        " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
282        "'2000-01-01',\n"
283        " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
284        "'2001-01-01']\n"
285        "Length: 1000, dtype: period[D]"
286    )
287    assert result == expected
288
289
290# ----------------------------------------------------------------------------
291# Reductions
292
293
294class TestReductions:
295    def test_min_max(self):
296        arr = period_array(
297            [
298                "2000-01-03",
299                "2000-01-03",
300                "NaT",
301                "2000-01-02",
302                "2000-01-05",
303                "2000-01-04",
304            ],
305            freq="D",
306        )
307
308        result = arr.min()
309        expected = pd.Period("2000-01-02", freq="D")
310        assert result == expected
311
312        result = arr.max()
313        expected = pd.Period("2000-01-05", freq="D")
314        assert result == expected
315
316        result = arr.min(skipna=False)
317        assert result is pd.NaT
318
319        result = arr.max(skipna=False)
320        assert result is pd.NaT
321
322    @pytest.mark.parametrize("skipna", [True, False])
323    def test_min_max_empty(self, skipna):
324        arr = period_array([], freq="D")
325        result = arr.min(skipna=skipna)
326        assert result is pd.NaT
327
328        result = arr.max(skipna=skipna)
329        assert result is pd.NaT
330
331
332# ----------------------------------------------------------------------------
333# Arrow interaction
334
335pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.15.1.dev")
336
337
338@pyarrow_skip
339def test_arrow_extension_type():
340    from pandas.core.arrays._arrow_utils import ArrowPeriodType
341
342    p1 = ArrowPeriodType("D")
343    p2 = ArrowPeriodType("D")
344    p3 = ArrowPeriodType("M")
345
346    assert p1.freq == "D"
347    assert p1 == p2
348    assert not p1 == p3
349    assert hash(p1) == hash(p2)
350    assert not hash(p1) == hash(p3)
351
352
353@pyarrow_skip
354@pytest.mark.parametrize(
355    "data, freq",
356    [
357        (pd.date_range("2017", periods=3), "D"),
358        (pd.date_range("2017", periods=3, freq="A"), "A-DEC"),
359    ],
360)
361def test_arrow_array(data, freq):
362    import pyarrow as pa
363
364    from pandas.core.arrays._arrow_utils import ArrowPeriodType
365
366    periods = period_array(data, freq=freq)
367    result = pa.array(periods)
368    assert isinstance(result.type, ArrowPeriodType)
369    assert result.type.freq == freq
370    expected = pa.array(periods.asi8, type="int64")
371    assert result.storage.equals(expected)
372
373    # convert to its storage type
374    result = pa.array(periods, type=pa.int64())
375    assert result.equals(expected)
376
377    # unsupported conversions
378    msg = "Not supported to convert PeriodArray to 'double' type"
379    with pytest.raises(TypeError, match=msg):
380        pa.array(periods, type="float64")
381
382    with pytest.raises(TypeError, match="different 'freq'"):
383        pa.array(periods, type=ArrowPeriodType("T"))
384
385
386@pyarrow_skip
387def test_arrow_array_missing():
388    import pyarrow as pa
389
390    from pandas.core.arrays._arrow_utils import ArrowPeriodType
391
392    arr = PeriodArray([1, 2, 3], freq="D")
393    arr[1] = pd.NaT
394
395    result = pa.array(arr)
396    assert isinstance(result.type, ArrowPeriodType)
397    assert result.type.freq == "D"
398    expected = pa.array([1, None, 3], type="int64")
399    assert result.storage.equals(expected)
400
401
402@pyarrow_skip
403def test_arrow_table_roundtrip():
404    import pyarrow as pa
405
406    from pandas.core.arrays._arrow_utils import ArrowPeriodType
407
408    arr = PeriodArray([1, 2, 3], freq="D")
409    arr[1] = pd.NaT
410    df = pd.DataFrame({"a": arr})
411
412    table = pa.table(df)
413    assert isinstance(table.field("a").type, ArrowPeriodType)
414    result = table.to_pandas()
415    assert isinstance(result["a"].dtype, PeriodDtype)
416    tm.assert_frame_equal(result, df)
417
418    table2 = pa.concat_tables([table, table])
419    result = table2.to_pandas()
420    expected = pd.concat([df, df], ignore_index=True)
421    tm.assert_frame_equal(result, expected)
422
423
424@pyarrow_skip
425def test_arrow_table_roundtrip_without_metadata():
426    import pyarrow as pa
427
428    arr = PeriodArray([1, 2, 3], freq="H")
429    arr[1] = pd.NaT
430    df = pd.DataFrame({"a": arr})
431
432    table = pa.table(df)
433    # remove the metadata
434    table = table.replace_schema_metadata()
435    assert table.schema.metadata is None
436
437    result = table.to_pandas()
438    assert isinstance(result["a"].dtype, PeriodDtype)
439    tm.assert_frame_equal(result, df)
440