1from copy import deepcopy
2import inspect
3import pydoc
4
5import numpy as np
6import pytest
7
8import pandas.util._test_decorators as td
9from pandas.util._test_decorators import async_mark, skip_if_no
10
11import pandas as pd
12from pandas import DataFrame, Series, date_range, timedelta_range
13import pandas._testing as tm
14
15
16class TestDataFrameMisc:
17    def test_getitem_pop_assign_name(self, float_frame):
18        s = float_frame["A"]
19        assert s.name == "A"
20
21        s = float_frame.pop("A")
22        assert s.name == "A"
23
24        s = float_frame.loc[:, "B"]
25        assert s.name == "B"
26
27        s2 = s.loc[:]
28        assert s2.name == "B"
29
30    def test_get_axis(self, float_frame):
31        f = float_frame
32        assert f._get_axis_number(0) == 0
33        assert f._get_axis_number(1) == 1
34        assert f._get_axis_number("index") == 0
35        assert f._get_axis_number("rows") == 0
36        assert f._get_axis_number("columns") == 1
37
38        assert f._get_axis_name(0) == "index"
39        assert f._get_axis_name(1) == "columns"
40        assert f._get_axis_name("index") == "index"
41        assert f._get_axis_name("rows") == "index"
42        assert f._get_axis_name("columns") == "columns"
43
44        assert f._get_axis(0) is f.index
45        assert f._get_axis(1) is f.columns
46
47        with pytest.raises(ValueError, match="No axis named"):
48            f._get_axis_number(2)
49
50        with pytest.raises(ValueError, match="No axis.*foo"):
51            f._get_axis_name("foo")
52
53        with pytest.raises(ValueError, match="No axis.*None"):
54            f._get_axis_name(None)
55
56        with pytest.raises(ValueError, match="No axis named"):
57            f._get_axis_number(None)
58
59    def test_column_contains_raises(self, float_frame):
60        with pytest.raises(TypeError, match="unhashable type: 'Index'"):
61            float_frame.columns in float_frame
62
63    def test_tab_completion(self):
64        # DataFrame whose columns are identifiers shall have them in __dir__.
65        df = DataFrame([list("abcd"), list("efgh")], columns=list("ABCD"))
66        for key in list("ABCD"):
67            assert key in dir(df)
68        assert isinstance(df.__getitem__("A"), pd.Series)
69
70        # DataFrame whose first-level columns are identifiers shall have
71        # them in __dir__.
72        df = DataFrame(
73            [list("abcd"), list("efgh")],
74            columns=pd.MultiIndex.from_tuples(list(zip("ABCD", "EFGH"))),
75        )
76        for key in list("ABCD"):
77            assert key in dir(df)
78        for key in list("EFGH"):
79            assert key not in dir(df)
80        assert isinstance(df.__getitem__("A"), pd.DataFrame)
81
82    def test_not_hashable(self):
83        empty_frame = DataFrame()
84
85        df = DataFrame([1])
86        msg = "'DataFrame' objects are mutable, thus they cannot be hashed"
87        with pytest.raises(TypeError, match=msg):
88            hash(df)
89        with pytest.raises(TypeError, match=msg):
90            hash(empty_frame)
91
92    def test_column_name_contains_unicode_surrogate(self):
93        # GH 25509
94        colname = "\ud83d"
95        df = DataFrame({colname: []})
96        # this should not crash
97        assert colname not in dir(df)
98        assert df.columns[0] == colname
99
100    def test_new_empty_index(self):
101        df1 = DataFrame(np.random.randn(0, 3))
102        df2 = DataFrame(np.random.randn(0, 3))
103        df1.index.name = "foo"
104        assert df2.index.name is None
105
106    def test_get_agg_axis(self, float_frame):
107        cols = float_frame._get_agg_axis(0)
108        assert cols is float_frame.columns
109
110        idx = float_frame._get_agg_axis(1)
111        assert idx is float_frame.index
112
113        msg = r"Axis must be 0 or 1 \(got 2\)"
114        with pytest.raises(ValueError, match=msg):
115            float_frame._get_agg_axis(2)
116
117    def test_empty(self, float_frame, float_string_frame):
118        empty_frame = DataFrame()
119        assert empty_frame.empty
120
121        assert not float_frame.empty
122        assert not float_string_frame.empty
123
124        # corner case
125        df = DataFrame({"A": [1.0, 2.0, 3.0], "B": ["a", "b", "c"]}, index=np.arange(3))
126        del df["A"]
127        assert not df.empty
128
129    def test_len(self, float_frame):
130        assert len(float_frame) == len(float_frame.index)
131
132        # single block corner case
133        arr = float_frame[["A", "B"]].values
134        expected = float_frame.reindex(columns=["A", "B"]).values
135        tm.assert_almost_equal(arr, expected)
136
137    def test_axis_aliases(self, float_frame):
138        f = float_frame
139
140        # reg name
141        expected = f.sum(axis=0)
142        result = f.sum(axis="index")
143        tm.assert_series_equal(result, expected)
144
145        expected = f.sum(axis=1)
146        result = f.sum(axis="columns")
147        tm.assert_series_equal(result, expected)
148
149    def test_class_axis(self):
150        # GH 18147
151        # no exception and no empty docstring
152        assert pydoc.getdoc(DataFrame.index)
153        assert pydoc.getdoc(DataFrame.columns)
154
155    def test_series_put_names(self, float_string_frame):
156        series = float_string_frame._series
157        for k, v in series.items():
158            assert v.name == k
159
160    def test_empty_nonzero(self):
161        df = DataFrame([1, 2, 3])
162        assert not df.empty
163        df = DataFrame(index=[1], columns=[1])
164        assert not df.empty
165        df = DataFrame(index=["a", "b"], columns=["c", "d"]).dropna()
166        assert df.empty
167        assert df.T.empty
168        empty_frames = [
169            DataFrame(),
170            DataFrame(index=[1]),
171            DataFrame(columns=[1]),
172            DataFrame({1: []}),
173        ]
174        for df in empty_frames:
175            assert df.empty
176            assert df.T.empty
177
178    def test_with_datetimelikes(self):
179
180        df = DataFrame(
181            {
182                "A": date_range("20130101", periods=10),
183                "B": timedelta_range("1 day", periods=10),
184            }
185        )
186        t = df.T
187
188        result = t.dtypes.value_counts()
189        expected = Series({np.dtype("object"): 10})
190        tm.assert_series_equal(result, expected)
191
192    def test_deepcopy(self, float_frame):
193        cp = deepcopy(float_frame)
194        series = cp["A"]
195        series[:] = 10
196        for idx, value in series.items():
197            assert float_frame["A"][idx] != value
198
199    def test_inplace_return_self(self):
200        # GH 1893
201
202        data = DataFrame(
203            {"a": ["foo", "bar", "baz", "qux"], "b": [0, 0, 1, 1], "c": [1, 2, 3, 4]}
204        )
205
206        def _check_f(base, f):
207            result = f(base)
208            assert result is None
209
210        # -----DataFrame-----
211
212        # set_index
213        f = lambda x: x.set_index("a", inplace=True)
214        _check_f(data.copy(), f)
215
216        # reset_index
217        f = lambda x: x.reset_index(inplace=True)
218        _check_f(data.set_index("a"), f)
219
220        # drop_duplicates
221        f = lambda x: x.drop_duplicates(inplace=True)
222        _check_f(data.copy(), f)
223
224        # sort
225        f = lambda x: x.sort_values("b", inplace=True)
226        _check_f(data.copy(), f)
227
228        # sort_index
229        f = lambda x: x.sort_index(inplace=True)
230        _check_f(data.copy(), f)
231
232        # fillna
233        f = lambda x: x.fillna(0, inplace=True)
234        _check_f(data.copy(), f)
235
236        # replace
237        f = lambda x: x.replace(1, 0, inplace=True)
238        _check_f(data.copy(), f)
239
240        # rename
241        f = lambda x: x.rename({1: "foo"}, inplace=True)
242        _check_f(data.copy(), f)
243
244        # -----Series-----
245        d = data.copy()["c"]
246
247        # reset_index
248        f = lambda x: x.reset_index(inplace=True, drop=True)
249        _check_f(data.set_index("a")["c"], f)
250
251        # fillna
252        f = lambda x: x.fillna(0, inplace=True)
253        _check_f(d.copy(), f)
254
255        # replace
256        f = lambda x: x.replace(1, 0, inplace=True)
257        _check_f(d.copy(), f)
258
259        # rename
260        f = lambda x: x.rename({1: "foo"}, inplace=True)
261        _check_f(d.copy(), f)
262
263    @async_mark()
264    @td.check_file_leaks
265    async def test_tab_complete_warning(self, ip, frame_or_series):
266        # GH 16409
267        pytest.importorskip("IPython", minversion="6.0.0")
268        from IPython.core.completer import provisionalcompleter
269
270        if frame_or_series is DataFrame:
271            code = "from pandas import DataFrame; obj = DataFrame()"
272        else:
273            code = "from pandas import Series; obj = Series(dtype=object)"
274
275        await ip.run_code(code)
276
277        # GH 31324 newer jedi version raises Deprecation warning;
278        #  appears resolved 2021-02-02
279        with tm.assert_produces_warning(None):
280            with provisionalcompleter("ignore"):
281                list(ip.Completer.completions("obj.", 1))
282
283    def test_attrs(self):
284        df = DataFrame({"A": [2, 3]})
285        assert df.attrs == {}
286        df.attrs["version"] = 1
287
288        result = df.rename(columns=str)
289        assert result.attrs == {"version": 1}
290
291    @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None])
292    def test_set_flags(self, allows_duplicate_labels, frame_or_series):
293        obj = DataFrame({"A": [1, 2]})
294        key = (0, 0)
295        if frame_or_series is Series:
296            obj = obj["A"]
297            key = 0
298
299        result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels)
300
301        if allows_duplicate_labels is None:
302            # We don't update when it's not provided
303            assert result.flags.allows_duplicate_labels is True
304        else:
305            assert result.flags.allows_duplicate_labels is allows_duplicate_labels
306
307        # We made a copy
308        assert obj is not result
309
310        # We didn't mutate obj
311        assert obj.flags.allows_duplicate_labels is True
312
313        # But we didn't copy data
314        result.iloc[key] = 0
315        assert obj.iloc[key] == 0
316
317        # Now we do copy.
318        result = obj.set_flags(
319            copy=True, allows_duplicate_labels=allows_duplicate_labels
320        )
321        result.iloc[key] = 10
322        assert obj.iloc[key] == 0
323
324    def test_constructor_expanddim_lookup(self):
325        # GH#33628 accessing _constructor_expanddim should not
326        #  raise NotImplementedError
327        df = DataFrame()
328
329        with pytest.raises(NotImplementedError, match="Not supported for DataFrames!"):
330            df._constructor_expanddim(np.arange(27).reshape(3, 3, 3))
331
332    @skip_if_no("jinja2")
333    def test_inspect_getmembers(self):
334        # GH38740
335        df = DataFrame()
336        with tm.assert_produces_warning(None):
337            inspect.getmembers(df)
338