1from itertools import product
2
3import numpy as np
4import pytest
5
6from pandas import DataFrame, NaT, date_range
7import pandas._testing as tm
8
9
10@pytest.fixture(params=product([True, False], [True, False]))
11def close_open_fixture(request):
12    return request.param
13
14
15@pytest.fixture
16def float_frame_with_na():
17    """
18    Fixture for DataFrame of floats with index of unique strings
19
20    Columns are ['A', 'B', 'C', 'D']; some entries are missing
21
22                       A         B         C         D
23    ABwBzA0ljw -1.128865 -0.897161  0.046603  0.274997
24    DJiRzmbyQF  0.728869  0.233502  0.722431 -0.890872
25    neMgPD5UBF  0.486072 -1.027393 -0.031553  1.449522
26    0yWA4n8VeX -1.937191 -1.142531  0.805215 -0.462018
27    3slYUbbqU1  0.153260  1.164691  1.489795 -0.545826
28    soujjZ0A08       NaN       NaN       NaN       NaN
29    7W6NLGsjB9       NaN       NaN       NaN       NaN
30    ...              ...       ...       ...       ...
31    uhfeaNkCR1 -0.231210 -0.340472  0.244717 -0.901590
32    n6p7GYuBIV -0.419052  1.922721 -0.125361 -0.727717
33    ZhzAeY6p1y  1.234374 -1.425359 -0.827038 -0.633189
34    uWdPsORyUh  0.046738 -0.980445 -1.102965  0.605503
35    3DJA6aN590 -0.091018 -1.684734 -1.100900  0.215947
36    2GBPAzdbMk -2.883405 -1.021071  1.209877  1.633083
37    sHadBoyVHw -2.223032 -0.326384  0.258931  0.245517
38
39    [30 rows x 4 columns]
40    """
41    df = DataFrame(tm.getSeriesData())
42    # set some NAs
43    df.iloc[5:10] = np.nan
44    df.iloc[15:20, -2:] = np.nan
45    return df
46
47
48@pytest.fixture
49def bool_frame_with_na():
50    """
51    Fixture for DataFrame of booleans with index of unique strings
52
53    Columns are ['A', 'B', 'C', 'D']; some entries are missing
54
55                    A      B      C      D
56    zBZxY2IDGd  False  False  False  False
57    IhBWBMWllt  False   True   True   True
58    ctjdvZSR6R   True  False   True   True
59    AVTujptmxb  False   True  False   True
60    G9lrImrSWq  False  False  False   True
61    sFFwdIUfz2    NaN    NaN    NaN    NaN
62    s15ptEJnRb    NaN    NaN    NaN    NaN
63    ...           ...    ...    ...    ...
64    UW41KkDyZ4   True   True  False  False
65    l9l6XkOdqV   True  False  False  False
66    X2MeZfzDYA  False   True  False  False
67    xWkIKU7vfX  False   True  False   True
68    QOhL6VmpGU  False  False  False   True
69    22PwkRJdat  False   True  False  False
70    kfboQ3VeIK   True  False   True  False
71
72    [30 rows x 4 columns]
73    """
74    df = DataFrame(tm.getSeriesData()) > 0
75    df = df.astype(object)
76    # set some NAs
77    df.iloc[5:10] = np.nan
78    df.iloc[15:20, -2:] = np.nan
79
80    # For `any` tests we need to have at least one True before the first NaN
81    #  in each column
82    for i in range(4):
83        df.iloc[i, i] = True
84    return df
85
86
87@pytest.fixture
88def float_string_frame():
89    """
90    Fixture for DataFrame of floats and strings with index of unique strings
91
92    Columns are ['A', 'B', 'C', 'D', 'foo'].
93
94                       A         B         C         D  foo
95    w3orJvq07g -1.594062 -1.084273 -1.252457  0.356460  bar
96    PeukuVdmz2  0.109855 -0.955086 -0.809485  0.409747  bar
97    ahp2KvwiM8 -1.533729 -0.142519 -0.154666  1.302623  bar
98    3WSJ7BUCGd  2.484964  0.213829  0.034778 -2.327831  bar
99    khdAmufk0U -0.193480 -0.743518 -0.077987  0.153646  bar
100    LE2DZiFlrE -0.193566 -1.343194 -0.107321  0.959978  bar
101    HJXSJhVn7b  0.142590  1.257603 -0.659409 -0.223844  bar
102    ...              ...       ...       ...       ...  ...
103    9a1Vypttgw -1.316394  1.601354  0.173596  1.213196  bar
104    h5d1gVFbEy  0.609475  1.106738 -0.155271  0.294630  bar
105    mK9LsTQG92  1.303613  0.857040 -1.019153  0.369468  bar
106    oOLksd9gKH  0.558219 -0.134491 -0.289869 -0.951033  bar
107    9jgoOjKyHg  0.058270 -0.496110 -0.413212 -0.852659  bar
108    jZLDHclHAO  0.096298  1.267510  0.549206 -0.005235  bar
109    lR0nxDp1C2 -2.119350 -0.794384  0.544118  0.145849  bar
110
111    [30 rows x 5 columns]
112    """
113    df = DataFrame(tm.getSeriesData())
114    df["foo"] = "bar"
115    return df
116
117
118@pytest.fixture
119def mixed_float_frame():
120    """
121    Fixture for DataFrame of different float types with index of unique strings
122
123    Columns are ['A', 'B', 'C', 'D'].
124
125                       A         B         C         D
126    GI7bbDaEZe -0.237908 -0.246225 -0.468506  0.752993
127    KGp9mFepzA -1.140809 -0.644046 -1.225586  0.801588
128    VeVYLAb1l2 -1.154013 -1.677615  0.690430 -0.003731
129    kmPME4WKhO  0.979578  0.998274 -0.776367  0.897607
130    CPyopdXTiz  0.048119 -0.257174  0.836426  0.111266
131    0kJZQndAj0  0.274357 -0.281135 -0.344238  0.834541
132    tqdwQsaHG8 -0.979716 -0.519897  0.582031  0.144710
133    ...              ...       ...       ...       ...
134    7FhZTWILQj -2.906357  1.261039 -0.780273 -0.537237
135    4pUDPM4eGq -2.042512 -0.464382 -0.382080  1.132612
136    B8dUgUzwTi -1.506637 -0.364435  1.087891  0.297653
137    hErlVYjVv9  1.477453 -0.495515 -0.713867  1.438427
138    1BKN3o7YLs  0.127535 -0.349812 -0.881836  0.489827
139    9S4Ekn7zga  1.445518 -2.095149  0.031982  0.373204
140    xN1dNn6OV6  1.425017 -0.983995 -0.363281 -0.224502
141
142    [30 rows x 4 columns]
143    """
144    df = DataFrame(tm.getSeriesData())
145    df.A = df.A.astype("float32")
146    df.B = df.B.astype("float32")
147    df.C = df.C.astype("float16")
148    df.D = df.D.astype("float64")
149    return df
150
151
152@pytest.fixture
153def mixed_int_frame():
154    """
155    Fixture for DataFrame of different int types with index of unique strings
156
157    Columns are ['A', 'B', 'C', 'D'].
158
159                A  B    C    D
160    mUrCZ67juP  0  1    2    2
161    rw99ACYaKS  0  1    0    0
162    7QsEcpaaVU  0  1    1    1
163    xkrimI2pcE  0  1    0    0
164    dz01SuzoS8  0  1  255  255
165    ccQkqOHX75 -1  1    0    0
166    DN0iXaoDLd  0  1    0    0
167    ...        .. ..  ...  ...
168    Dfb141wAaQ  1  1  254  254
169    IPD8eQOVu5  0  1    0    0
170    CcaKulsCmv  0  1    0    0
171    rIBa8gu7E5  0  1    0    0
172    RP6peZmh5o  0  1    1    1
173    NMb9pipQWQ  0  1    0    0
174    PqgbJEzjib  0  1    3    3
175
176    [30 rows x 4 columns]
177    """
178    df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
179    df.A = df.A.astype("int32")
180    df.B = np.ones(len(df.B), dtype="uint64")
181    df.C = df.C.astype("uint8")
182    df.D = df.C.astype("int64")
183    return df
184
185
186@pytest.fixture
187def mixed_type_frame():
188    """
189    Fixture for DataFrame of float/int/string columns with RangeIndex
190    Columns are ['a', 'b', 'c', 'float32', 'int32'].
191    """
192    return DataFrame(
193        {
194            "a": 1.0,
195            "b": 2,
196            "c": "foo",
197            "float32": np.array([1.0] * 10, dtype="float32"),
198            "int32": np.array([1] * 10, dtype="int32"),
199        },
200        index=np.arange(10),
201    )
202
203
204@pytest.fixture
205def timezone_frame():
206    """
207    Fixture for DataFrame of date_range Series with different time zones
208
209    Columns are ['A', 'B', 'C']; some entries are missing
210
211               A                         B                         C
212    0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
213    1 2013-01-02                       NaT                       NaT
214    2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
215    """
216    df = DataFrame(
217        {
218            "A": date_range("20130101", periods=3),
219            "B": date_range("20130101", periods=3, tz="US/Eastern"),
220            "C": date_range("20130101", periods=3, tz="CET"),
221        }
222    )
223    df.iloc[1, 1] = NaT
224    df.iloc[1, 2] = NaT
225    return df
226
227
228@pytest.fixture
229def uint64_frame():
230    """
231    Fixture for DataFrame with uint64 values
232
233    Columns are ['A', 'B']
234    """
235    return DataFrame(
236        {"A": np.arange(3), "B": [2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10]}, dtype=np.uint64
237    )
238
239
240@pytest.fixture
241def simple_frame():
242    """
243    Fixture for simple 3x3 DataFrame
244
245    Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
246
247       one  two  three
248    a  1.0  2.0    3.0
249    b  4.0  5.0    6.0
250    c  7.0  8.0    9.0
251    """
252    arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
253
254    return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"])
255
256
257@pytest.fixture
258def frame_of_index_cols():
259    """
260    Fixture for DataFrame of columns that can be used for indexing
261
262    Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
263    'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
264
265         A      B  C         D         E  (tuple, as, label)
266    0  foo    one  a  0.608477 -0.012500           -1.664297
267    1  foo    two  b -0.633460  0.249614           -0.364411
268    2  foo  three  c  0.615256  2.154968           -0.834666
269    3  bar    one  d  0.234246  1.085675            0.718445
270    4  bar    two  e  0.533841 -0.005702           -3.533912
271    """
272    df = DataFrame(
273        {
274            "A": ["foo", "foo", "foo", "bar", "bar"],
275            "B": ["one", "two", "three", "one", "two"],
276            "C": ["a", "b", "c", "d", "e"],
277            "D": np.random.randn(5),
278            "E": np.random.randn(5),
279            ("tuple", "as", "label"): np.random.randn(5),
280        }
281    )
282    return df
283