1import string
2
3import numpy as np
4import pandas as pd
5from packaging.version import parse as parse_version
6
7PANDAS_VERSION = parse_version(pd.__version__)
8PANDAS_GT_104 = PANDAS_VERSION >= parse_version("1.0.4")
9PANDAS_GT_110 = PANDAS_VERSION >= parse_version("1.1.0")
10PANDAS_GT_120 = PANDAS_VERSION >= parse_version("1.2.0")
11PANDAS_GT_121 = PANDAS_VERSION >= parse_version("1.2.1")
12PANDAS_GT_130 = PANDAS_VERSION >= parse_version("1.3.0")
13PANDAS_GT_131 = PANDAS_VERSION >= parse_version("1.3.1")
14PANDAS_GT_133 = PANDAS_VERSION >= parse_version("1.3.3")
15PANDAS_GT_140 = PANDAS_VERSION.release == (1, 4, 0)  # include pre-release
16
17import pandas.testing as tm
18
19
20def assert_categorical_equal(left, right, *args, **kwargs):
21    tm.assert_extension_array_equal(left, right, *args, **kwargs)
22    assert pd.api.types.is_categorical_dtype(
23        left.dtype
24    ), f"{left} is not categorical dtype"
25    assert pd.api.types.is_categorical_dtype(
26        right.dtype
27    ), f"{right} is not categorical dtype"
28
29
30def assert_numpy_array_equal(left, right):
31    left_na = pd.isna(left)
32    right_na = pd.isna(right)
33    np.testing.assert_array_equal(left_na, right_na)
34
35    left_valid = left[~left_na]
36    right_valid = right[~right_na]
37    np.testing.assert_array_equal(left_valid, right_valid)
38
39
40def makeDataFrame():
41    data = np.random.randn(30, 4)
42    index = list(string.ascii_letters)[:30]
43    return pd.DataFrame(data, index=index, columns=list("ABCD"))
44
45
46def makeTimeDataFrame():
47    data = makeDataFrame()
48    data.index = makeDateIndex()
49    return data
50
51
52def makeTimeSeries():
53    return makeTimeDataFrame()["A"]
54
55
56def makeDateIndex(k=30, freq="B"):
57    return pd.date_range("2000", periods=k, freq=freq)
58
59
60def makeTimedeltaIndex(k=30, freq="D"):
61    return pd.timedelta_range("1 day", periods=k, freq=freq)
62
63
64def makeMissingDataframe():
65    df = makeDataFrame()
66    data = df.values
67    data = np.where(data > 1, np.nan, data)
68    return pd.DataFrame(data, index=df.index, columns=df.columns)
69
70
71def makeMixedDataFrame():
72    df = pd.DataFrame(
73        {
74            "A": [0.0, 1, 2, 3, 4],
75            "B": [0.0, 1, 0, 1, 0],
76            "C": [f"foo{i}" for i in range(5)],
77            "D": pd.date_range("2009-01-01", periods=5),
78        }
79    )
80    return df
81