1import string 2 3import numpy as np 4import pandas as pd 5from packaging.version import parse as parse_version 6 7PANDAS_VERSION = parse_version(pd.__version__) 8PANDAS_GT_104 = PANDAS_VERSION >= parse_version("1.0.4") 9PANDAS_GT_110 = PANDAS_VERSION >= parse_version("1.1.0") 10PANDAS_GT_120 = PANDAS_VERSION >= parse_version("1.2.0") 11PANDAS_GT_121 = PANDAS_VERSION >= parse_version("1.2.1") 12PANDAS_GT_130 = PANDAS_VERSION >= parse_version("1.3.0") 13PANDAS_GT_131 = PANDAS_VERSION >= parse_version("1.3.1") 14PANDAS_GT_133 = PANDAS_VERSION >= parse_version("1.3.3") 15PANDAS_GT_140 = PANDAS_VERSION.release == (1, 4, 0) # include pre-release 16 17import pandas.testing as tm 18 19 20def assert_categorical_equal(left, right, *args, **kwargs): 21 tm.assert_extension_array_equal(left, right, *args, **kwargs) 22 assert pd.api.types.is_categorical_dtype( 23 left.dtype 24 ), f"{left} is not categorical dtype" 25 assert pd.api.types.is_categorical_dtype( 26 right.dtype 27 ), f"{right} is not categorical dtype" 28 29 30def assert_numpy_array_equal(left, right): 31 left_na = pd.isna(left) 32 right_na = pd.isna(right) 33 np.testing.assert_array_equal(left_na, right_na) 34 35 left_valid = left[~left_na] 36 right_valid = right[~right_na] 37 np.testing.assert_array_equal(left_valid, right_valid) 38 39 40def makeDataFrame(): 41 data = np.random.randn(30, 4) 42 index = list(string.ascii_letters)[:30] 43 return pd.DataFrame(data, index=index, columns=list("ABCD")) 44 45 46def makeTimeDataFrame(): 47 data = makeDataFrame() 48 data.index = makeDateIndex() 49 return data 50 51 52def makeTimeSeries(): 53 return makeTimeDataFrame()["A"] 54 55 56def makeDateIndex(k=30, freq="B"): 57 return pd.date_range("2000", periods=k, freq=freq) 58 59 60def makeTimedeltaIndex(k=30, freq="D"): 61 return pd.timedelta_range("1 day", periods=k, freq=freq) 62 63 64def makeMissingDataframe(): 65 df = makeDataFrame() 66 data = df.values 67 data = np.where(data > 1, np.nan, data) 68 return pd.DataFrame(data, index=df.index, columns=df.columns) 69 70 71def makeMixedDataFrame(): 72 df = pd.DataFrame( 73 { 74 "A": [0.0, 1, 2, 3, 4], 75 "B": [0.0, 1, 0, 1, 0], 76 "C": [f"foo{i}" for i in range(5)], 77 "D": pd.date_range("2009-01-01", periods=5), 78 } 79 ) 80 return df 81