1from itertools import product 2 3import numpy as np 4import pytest 5 6from pandas import DataFrame, NaT, date_range 7import pandas._testing as tm 8 9 10@pytest.fixture(params=product([True, False], [True, False])) 11def close_open_fixture(request): 12 return request.param 13 14 15@pytest.fixture 16def float_frame_with_na(): 17 """ 18 Fixture for DataFrame of floats with index of unique strings 19 20 Columns are ['A', 'B', 'C', 'D']; some entries are missing 21 22 A B C D 23 ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997 24 DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872 25 neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522 26 0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018 27 3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826 28 soujjZ0A08 NaN NaN NaN NaN 29 7W6NLGsjB9 NaN NaN NaN NaN 30 ... ... ... ... ... 31 uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590 32 n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717 33 ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189 34 uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503 35 3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947 36 2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083 37 sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517 38 39 [30 rows x 4 columns] 40 """ 41 df = DataFrame(tm.getSeriesData()) 42 # set some NAs 43 df.iloc[5:10] = np.nan 44 df.iloc[15:20, -2:] = np.nan 45 return df 46 47 48@pytest.fixture 49def bool_frame_with_na(): 50 """ 51 Fixture for DataFrame of booleans with index of unique strings 52 53 Columns are ['A', 'B', 'C', 'D']; some entries are missing 54 55 A B C D 56 zBZxY2IDGd False False False False 57 IhBWBMWllt False True True True 58 ctjdvZSR6R True False True True 59 AVTujptmxb False True False True 60 G9lrImrSWq False False False True 61 sFFwdIUfz2 NaN NaN NaN NaN 62 s15ptEJnRb NaN NaN NaN NaN 63 ... ... ... ... ... 64 UW41KkDyZ4 True True False False 65 l9l6XkOdqV True False False False 66 X2MeZfzDYA False True False False 67 xWkIKU7vfX False True False True 68 QOhL6VmpGU False False False True 69 22PwkRJdat False True False False 70 kfboQ3VeIK True False True False 71 72 [30 rows x 4 columns] 73 """ 74 df = DataFrame(tm.getSeriesData()) > 0 75 df = df.astype(object) 76 # set some NAs 77 df.iloc[5:10] = np.nan 78 df.iloc[15:20, -2:] = np.nan 79 80 # For `any` tests we need to have at least one True before the first NaN 81 # in each column 82 for i in range(4): 83 df.iloc[i, i] = True 84 return df 85 86 87@pytest.fixture 88def float_string_frame(): 89 """ 90 Fixture for DataFrame of floats and strings with index of unique strings 91 92 Columns are ['A', 'B', 'C', 'D', 'foo']. 93 94 A B C D foo 95 w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar 96 PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar 97 ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar 98 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar 99 khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar 100 LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar 101 HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar 102 ... ... ... ... ... ... 103 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar 104 h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar 105 mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar 106 oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar 107 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar 108 jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar 109 lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar 110 111 [30 rows x 5 columns] 112 """ 113 df = DataFrame(tm.getSeriesData()) 114 df["foo"] = "bar" 115 return df 116 117 118@pytest.fixture 119def mixed_float_frame(): 120 """ 121 Fixture for DataFrame of different float types with index of unique strings 122 123 Columns are ['A', 'B', 'C', 'D']. 124 125 A B C D 126 GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993 127 KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588 128 VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731 129 kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607 130 CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266 131 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541 132 tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710 133 ... ... ... ... ... 134 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237 135 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612 136 B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653 137 hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427 138 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827 139 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204 140 xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502 141 142 [30 rows x 4 columns] 143 """ 144 df = DataFrame(tm.getSeriesData()) 145 df.A = df.A.astype("float32") 146 df.B = df.B.astype("float32") 147 df.C = df.C.astype("float16") 148 df.D = df.D.astype("float64") 149 return df 150 151 152@pytest.fixture 153def mixed_int_frame(): 154 """ 155 Fixture for DataFrame of different int types with index of unique strings 156 157 Columns are ['A', 'B', 'C', 'D']. 158 159 A B C D 160 mUrCZ67juP 0 1 2 2 161 rw99ACYaKS 0 1 0 0 162 7QsEcpaaVU 0 1 1 1 163 xkrimI2pcE 0 1 0 0 164 dz01SuzoS8 0 1 255 255 165 ccQkqOHX75 -1 1 0 0 166 DN0iXaoDLd 0 1 0 0 167 ... .. .. ... ... 168 Dfb141wAaQ 1 1 254 254 169 IPD8eQOVu5 0 1 0 0 170 CcaKulsCmv 0 1 0 0 171 rIBa8gu7E5 0 1 0 0 172 RP6peZmh5o 0 1 1 1 173 NMb9pipQWQ 0 1 0 0 174 PqgbJEzjib 0 1 3 3 175 176 [30 rows x 4 columns] 177 """ 178 df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) 179 df.A = df.A.astype("int32") 180 df.B = np.ones(len(df.B), dtype="uint64") 181 df.C = df.C.astype("uint8") 182 df.D = df.C.astype("int64") 183 return df 184 185 186@pytest.fixture 187def mixed_type_frame(): 188 """ 189 Fixture for DataFrame of float/int/string columns with RangeIndex 190 Columns are ['a', 'b', 'c', 'float32', 'int32']. 191 """ 192 return DataFrame( 193 { 194 "a": 1.0, 195 "b": 2, 196 "c": "foo", 197 "float32": np.array([1.0] * 10, dtype="float32"), 198 "int32": np.array([1] * 10, dtype="int32"), 199 }, 200 index=np.arange(10), 201 ) 202 203 204@pytest.fixture 205def timezone_frame(): 206 """ 207 Fixture for DataFrame of date_range Series with different time zones 208 209 Columns are ['A', 'B', 'C']; some entries are missing 210 211 A B C 212 0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00 213 1 2013-01-02 NaT NaT 214 2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00 215 """ 216 df = DataFrame( 217 { 218 "A": date_range("20130101", periods=3), 219 "B": date_range("20130101", periods=3, tz="US/Eastern"), 220 "C": date_range("20130101", periods=3, tz="CET"), 221 } 222 ) 223 df.iloc[1, 1] = NaT 224 df.iloc[1, 2] = NaT 225 return df 226 227 228@pytest.fixture 229def uint64_frame(): 230 """ 231 Fixture for DataFrame with uint64 values 232 233 Columns are ['A', 'B'] 234 """ 235 return DataFrame( 236 {"A": np.arange(3), "B": [2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10]}, dtype=np.uint64 237 ) 238 239 240@pytest.fixture 241def simple_frame(): 242 """ 243 Fixture for simple 3x3 DataFrame 244 245 Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. 246 247 one two three 248 a 1.0 2.0 3.0 249 b 4.0 5.0 6.0 250 c 7.0 8.0 9.0 251 """ 252 arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) 253 254 return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"]) 255 256 257@pytest.fixture 258def frame_of_index_cols(): 259 """ 260 Fixture for DataFrame of columns that can be used for indexing 261 262 Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')]; 263 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique. 264 265 A B C D E (tuple, as, label) 266 0 foo one a 0.608477 -0.012500 -1.664297 267 1 foo two b -0.633460 0.249614 -0.364411 268 2 foo three c 0.615256 2.154968 -0.834666 269 3 bar one d 0.234246 1.085675 0.718445 270 4 bar two e 0.533841 -0.005702 -3.533912 271 """ 272 df = DataFrame( 273 { 274 "A": ["foo", "foo", "foo", "bar", "bar"], 275 "B": ["one", "two", "three", "one", "two"], 276 "C": ["a", "b", "c", "d", "e"], 277 "D": np.random.randn(5), 278 "E": np.random.randn(5), 279 ("tuple", "as", "label"): np.random.randn(5), 280 } 281 ) 282 return df 283