1import numpy as np 2import pytest 3 4from pandas._libs.tslibs import iNaT 5from pandas._libs.tslibs.period import IncompatibleFrequency 6import pandas.util._test_decorators as td 7 8from pandas.core.dtypes.base import registry 9from pandas.core.dtypes.dtypes import PeriodDtype 10 11import pandas as pd 12import pandas._testing as tm 13from pandas.core.arrays import PeriodArray, period_array 14 15# ---------------------------------------------------------------------------- 16# Dtype 17 18 19def test_registered(): 20 assert PeriodDtype in registry.dtypes 21 result = registry.find("Period[D]") 22 expected = PeriodDtype("D") 23 assert result == expected 24 25 26# ---------------------------------------------------------------------------- 27# period_array 28 29 30@pytest.mark.parametrize( 31 "data, freq, expected", 32 [ 33 ([pd.Period("2017", "D")], None, [17167]), 34 ([pd.Period("2017", "D")], "D", [17167]), 35 ([2017], "D", [17167]), 36 (["2017"], "D", [17167]), 37 ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]), 38 ([pd.Period("2017", "D"), None], None, [17167, iNaT]), 39 (pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]), 40 (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]), 41 (pd.period_range("2017", periods=4, freq="Q"), None, [188, 189, 190, 191]), 42 ], 43) 44def test_period_array_ok(data, freq, expected): 45 result = period_array(data, freq=freq).asi8 46 expected = np.asarray(expected, dtype=np.int64) 47 tm.assert_numpy_array_equal(result, expected) 48 49 50def test_period_array_readonly_object(): 51 # https://github.com/pandas-dev/pandas/issues/25403 52 pa = period_array([pd.Period("2019-01-01")]) 53 arr = np.asarray(pa, dtype="object") 54 arr.setflags(write=False) 55 56 result = period_array(arr) 57 tm.assert_period_array_equal(result, pa) 58 59 result = pd.Series(arr) 60 tm.assert_series_equal(result, pd.Series(pa)) 61 62 result = pd.DataFrame({"A": arr}) 63 tm.assert_frame_equal(result, pd.DataFrame({"A": pa})) 64 65 66def test_from_datetime64_freq_changes(): 67 # https://github.com/pandas-dev/pandas/issues/23438 68 arr = pd.date_range("2017", periods=3, freq="D") 69 result = PeriodArray._from_datetime64(arr, freq="M") 70 expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M") 71 tm.assert_period_array_equal(result, expected) 72 73 74@pytest.mark.parametrize( 75 "data, freq, msg", 76 [ 77 ( 78 [pd.Period("2017", "D"), pd.Period("2017", "A")], 79 None, 80 "Input has different freq", 81 ), 82 ([pd.Period("2017", "D")], "A", "Input has different freq"), 83 ], 84) 85def test_period_array_raises(data, freq, msg): 86 with pytest.raises(IncompatibleFrequency, match=msg): 87 period_array(data, freq) 88 89 90def test_period_array_non_period_series_raies(): 91 ser = pd.Series([1, 2, 3]) 92 with pytest.raises(TypeError, match="dtype"): 93 PeriodArray(ser, freq="D") 94 95 96def test_period_array_freq_mismatch(): 97 arr = period_array(["2000", "2001"], freq="D") 98 with pytest.raises(IncompatibleFrequency, match="freq"): 99 PeriodArray(arr, freq="M") 100 101 with pytest.raises(IncompatibleFrequency, match="freq"): 102 PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd()) 103 104 105def test_asi8(): 106 result = period_array(["2000", "2001", None], freq="D").asi8 107 expected = np.array([10957, 11323, iNaT]) 108 tm.assert_numpy_array_equal(result, expected) 109 110 111def test_take_raises(): 112 arr = period_array(["2000", "2001"], freq="D") 113 with pytest.raises(IncompatibleFrequency, match="freq"): 114 arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W")) 115 116 msg = "value should be a 'Period' or 'NaT'. Got 'str' instead" 117 with pytest.raises(TypeError, match=msg): 118 arr.take([0, -1], allow_fill=True, fill_value="foo") 119 120 121@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) 122def test_astype(dtype): 123 # We choose to ignore the sign and size of integers for 124 # Period/Datetime/Timedelta astype 125 arr = period_array(["2000", "2001", None], freq="D") 126 result = arr.astype(dtype) 127 128 if np.dtype(dtype).kind == "u": 129 expected_dtype = np.dtype("uint64") 130 else: 131 expected_dtype = np.dtype("int64") 132 expected = arr.astype(expected_dtype) 133 134 assert result.dtype == expected_dtype 135 tm.assert_numpy_array_equal(result, expected) 136 137 138def test_astype_copies(): 139 arr = period_array(["2000", "2001", None], freq="D") 140 result = arr.astype(np.int64, copy=False) 141 # Add the `.base`, since we now use `.asi8` which returns a view. 142 # We could maybe override it in PeriodArray to return ._data directly. 143 assert result.base is arr._data 144 145 result = arr.astype(np.int64, copy=True) 146 assert result is not arr._data 147 tm.assert_numpy_array_equal(result, arr._data.view("i8")) 148 149 150def test_astype_categorical(): 151 arr = period_array(["2000", "2001", "2001", None], freq="D") 152 result = arr.astype("category") 153 categories = pd.PeriodIndex(["2000", "2001"], freq="D") 154 expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories) 155 tm.assert_categorical_equal(result, expected) 156 157 158def test_astype_period(): 159 arr = period_array(["2000", "2001", None], freq="D") 160 result = arr.astype(PeriodDtype("M")) 161 expected = period_array(["2000", "2001", None], freq="M") 162 tm.assert_period_array_equal(result, expected) 163 164 165@pytest.mark.parametrize("other", ["datetime64[ns]", "timedelta64[ns]"]) 166def test_astype_datetime(other): 167 arr = period_array(["2000", "2001", None], freq="D") 168 # slice off the [ns] so that the regex matches. 169 with pytest.raises(TypeError, match=other[:-4]): 170 arr.astype(other) 171 172 173def test_fillna_raises(): 174 arr = period_array(["2000", "2001", "2002"], freq="D") 175 with pytest.raises(ValueError, match="Length"): 176 arr.fillna(arr[:2]) 177 178 179def test_fillna_copies(): 180 arr = period_array(["2000", "2001", "2002"], freq="D") 181 result = arr.fillna(pd.Period("2000", "D")) 182 assert result is not arr 183 184 185# ---------------------------------------------------------------------------- 186# setitem 187 188 189@pytest.mark.parametrize( 190 "key, value, expected", 191 [ 192 ([0], pd.Period("2000", "D"), [10957, 1, 2]), 193 ([0], None, [iNaT, 1, 2]), 194 ([0], np.nan, [iNaT, 1, 2]), 195 ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3), 196 ( 197 [0, 1, 2], 198 [pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")], 199 [10957, 11323, 11688], 200 ), 201 ], 202) 203def test_setitem(key, value, expected): 204 arr = PeriodArray(np.arange(3), freq="D") 205 expected = PeriodArray(expected, freq="D") 206 arr[key] = value 207 tm.assert_period_array_equal(arr, expected) 208 209 210def test_setitem_raises_incompatible_freq(): 211 arr = PeriodArray(np.arange(3), freq="D") 212 with pytest.raises(IncompatibleFrequency, match="freq"): 213 arr[0] = pd.Period("2000", freq="A") 214 215 other = period_array(["2000", "2001"], freq="A") 216 with pytest.raises(IncompatibleFrequency, match="freq"): 217 arr[[0, 1]] = other 218 219 220def test_setitem_raises_length(): 221 arr = PeriodArray(np.arange(3), freq="D") 222 with pytest.raises(ValueError, match="length"): 223 arr[[0, 1]] = [pd.Period("2000", freq="D")] 224 225 226def test_setitem_raises_type(): 227 arr = PeriodArray(np.arange(3), freq="D") 228 with pytest.raises(TypeError, match="int"): 229 arr[0] = 1 230 231 232# ---------------------------------------------------------------------------- 233# Ops 234 235 236def test_sub_period(): 237 arr = period_array(["2000", "2001"], freq="D") 238 other = pd.Period("2000", freq="M") 239 with pytest.raises(IncompatibleFrequency, match="freq"): 240 arr - other 241 242 243# ---------------------------------------------------------------------------- 244# Methods 245 246 247@pytest.mark.parametrize( 248 "other", 249 [pd.Period("2000", freq="H"), period_array(["2000", "2001", "2000"], freq="H")], 250) 251def test_where_different_freq_raises(other): 252 ser = pd.Series(period_array(["2000", "2001", "2002"], freq="D")) 253 cond = np.array([True, False, True]) 254 with pytest.raises(IncompatibleFrequency, match="freq"): 255 ser.where(cond, other) 256 257 258# ---------------------------------------------------------------------------- 259# Printing 260 261 262def test_repr_small(): 263 arr = period_array(["2000", "2001"], freq="D") 264 result = str(arr) 265 expected = ( 266 "<PeriodArray>\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]" 267 ) 268 assert result == expected 269 270 271def test_repr_large(): 272 arr = period_array(["2000", "2001"] * 500, freq="D") 273 result = str(arr) 274 expected = ( 275 "<PeriodArray>\n" 276 "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " 277 "'2000-01-01',\n" 278 " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " 279 "'2001-01-01',\n" 280 " ...\n" 281 " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " 282 "'2000-01-01',\n" 283 " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " 284 "'2001-01-01']\n" 285 "Length: 1000, dtype: period[D]" 286 ) 287 assert result == expected 288 289 290# ---------------------------------------------------------------------------- 291# Reductions 292 293 294class TestReductions: 295 def test_min_max(self): 296 arr = period_array( 297 [ 298 "2000-01-03", 299 "2000-01-03", 300 "NaT", 301 "2000-01-02", 302 "2000-01-05", 303 "2000-01-04", 304 ], 305 freq="D", 306 ) 307 308 result = arr.min() 309 expected = pd.Period("2000-01-02", freq="D") 310 assert result == expected 311 312 result = arr.max() 313 expected = pd.Period("2000-01-05", freq="D") 314 assert result == expected 315 316 result = arr.min(skipna=False) 317 assert result is pd.NaT 318 319 result = arr.max(skipna=False) 320 assert result is pd.NaT 321 322 @pytest.mark.parametrize("skipna", [True, False]) 323 def test_min_max_empty(self, skipna): 324 arr = period_array([], freq="D") 325 result = arr.min(skipna=skipna) 326 assert result is pd.NaT 327 328 result = arr.max(skipna=skipna) 329 assert result is pd.NaT 330 331 332# ---------------------------------------------------------------------------- 333# Arrow interaction 334 335pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.15.1.dev") 336 337 338@pyarrow_skip 339def test_arrow_extension_type(): 340 from pandas.core.arrays._arrow_utils import ArrowPeriodType 341 342 p1 = ArrowPeriodType("D") 343 p2 = ArrowPeriodType("D") 344 p3 = ArrowPeriodType("M") 345 346 assert p1.freq == "D" 347 assert p1 == p2 348 assert not p1 == p3 349 assert hash(p1) == hash(p2) 350 assert not hash(p1) == hash(p3) 351 352 353@pyarrow_skip 354@pytest.mark.parametrize( 355 "data, freq", 356 [ 357 (pd.date_range("2017", periods=3), "D"), 358 (pd.date_range("2017", periods=3, freq="A"), "A-DEC"), 359 ], 360) 361def test_arrow_array(data, freq): 362 import pyarrow as pa 363 364 from pandas.core.arrays._arrow_utils import ArrowPeriodType 365 366 periods = period_array(data, freq=freq) 367 result = pa.array(periods) 368 assert isinstance(result.type, ArrowPeriodType) 369 assert result.type.freq == freq 370 expected = pa.array(periods.asi8, type="int64") 371 assert result.storage.equals(expected) 372 373 # convert to its storage type 374 result = pa.array(periods, type=pa.int64()) 375 assert result.equals(expected) 376 377 # unsupported conversions 378 msg = "Not supported to convert PeriodArray to 'double' type" 379 with pytest.raises(TypeError, match=msg): 380 pa.array(periods, type="float64") 381 382 with pytest.raises(TypeError, match="different 'freq'"): 383 pa.array(periods, type=ArrowPeriodType("T")) 384 385 386@pyarrow_skip 387def test_arrow_array_missing(): 388 import pyarrow as pa 389 390 from pandas.core.arrays._arrow_utils import ArrowPeriodType 391 392 arr = PeriodArray([1, 2, 3], freq="D") 393 arr[1] = pd.NaT 394 395 result = pa.array(arr) 396 assert isinstance(result.type, ArrowPeriodType) 397 assert result.type.freq == "D" 398 expected = pa.array([1, None, 3], type="int64") 399 assert result.storage.equals(expected) 400 401 402@pyarrow_skip 403def test_arrow_table_roundtrip(): 404 import pyarrow as pa 405 406 from pandas.core.arrays._arrow_utils import ArrowPeriodType 407 408 arr = PeriodArray([1, 2, 3], freq="D") 409 arr[1] = pd.NaT 410 df = pd.DataFrame({"a": arr}) 411 412 table = pa.table(df) 413 assert isinstance(table.field("a").type, ArrowPeriodType) 414 result = table.to_pandas() 415 assert isinstance(result["a"].dtype, PeriodDtype) 416 tm.assert_frame_equal(result, df) 417 418 table2 = pa.concat_tables([table, table]) 419 result = table2.to_pandas() 420 expected = pd.concat([df, df], ignore_index=True) 421 tm.assert_frame_equal(result, expected) 422 423 424@pyarrow_skip 425def test_arrow_table_roundtrip_without_metadata(): 426 import pyarrow as pa 427 428 arr = PeriodArray([1, 2, 3], freq="H") 429 arr[1] = pd.NaT 430 df = pd.DataFrame({"a": arr}) 431 432 table = pa.table(df) 433 # remove the metadata 434 table = table.replace_schema_metadata() 435 assert table.schema.metadata is None 436 437 result = table.to_pandas() 438 assert isinstance(result["a"].dtype, PeriodDtype) 439 tm.assert_frame_equal(result, df) 440