1import re
2
3import numpy as np
4import pytest
5
6from pandas.errors import PerformanceWarning
7
8import pandas as pd
9from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
10import pandas._testing as tm
11
12
13@pytest.mark.parametrize(
14    "msg,labels,level",
15    [
16        (r"labels \[4\] not found in level", 4, "a"),
17        (r"labels \[7\] not found in level", 7, "b"),
18    ],
19)
20def test_drop_raise_exception_if_labels_not_in_level(msg, labels, level):
21    # GH 8594
22    mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"])
23    s = pd.Series([10, 20, 30], index=mi)
24    df = DataFrame([10, 20, 30], index=mi)
25
26    with pytest.raises(KeyError, match=msg):
27        s.drop(labels, level=level)
28    with pytest.raises(KeyError, match=msg):
29        df.drop(labels, level=level)
30
31
32@pytest.mark.parametrize("labels,level", [(4, "a"), (7, "b")])
33def test_drop_errors_ignore(labels, level):
34    # GH 8594
35    mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"])
36    s = pd.Series([10, 20, 30], index=mi)
37    df = DataFrame([10, 20, 30], index=mi)
38
39    expected_s = s.drop(labels, level=level, errors="ignore")
40    tm.assert_series_equal(s, expected_s)
41
42    expected_df = df.drop(labels, level=level, errors="ignore")
43    tm.assert_frame_equal(df, expected_df)
44
45
46def test_drop_with_non_unique_datetime_index_and_invalid_keys():
47    # GH 30399
48
49    # define dataframe with unique datetime index
50    df = DataFrame(
51        np.random.randn(5, 3),
52        columns=["a", "b", "c"],
53        index=pd.date_range("2012", freq="H", periods=5),
54    )
55    # create dataframe with non-unique datetime index
56    df = df.iloc[[0, 2, 2, 3]].copy()
57
58    with pytest.raises(KeyError, match="not found in axis"):
59        df.drop(["a", "b"])  # Dropping with labels not exist in the index
60
61
62class TestDataFrameDrop:
63    def test_drop_names(self):
64        df = DataFrame(
65            [[1, 2, 3], [3, 4, 5], [5, 6, 7]],
66            index=["a", "b", "c"],
67            columns=["d", "e", "f"],
68        )
69        df.index.name, df.columns.name = "first", "second"
70        df_dropped_b = df.drop("b")
71        df_dropped_e = df.drop("e", axis=1)
72        df_inplace_b, df_inplace_e = df.copy(), df.copy()
73        return_value = df_inplace_b.drop("b", inplace=True)
74        assert return_value is None
75        return_value = df_inplace_e.drop("e", axis=1, inplace=True)
76        assert return_value is None
77        for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e):
78            assert obj.index.name == "first"
79            assert obj.columns.name == "second"
80        assert list(df.columns) == ["d", "e", "f"]
81
82        msg = r"\['g'\] not found in axis"
83        with pytest.raises(KeyError, match=msg):
84            df.drop(["g"])
85        with pytest.raises(KeyError, match=msg):
86            df.drop(["g"], 1)
87
88        # errors = 'ignore'
89        dropped = df.drop(["g"], errors="ignore")
90        expected = Index(["a", "b", "c"], name="first")
91        tm.assert_index_equal(dropped.index, expected)
92
93        dropped = df.drop(["b", "g"], errors="ignore")
94        expected = Index(["a", "c"], name="first")
95        tm.assert_index_equal(dropped.index, expected)
96
97        dropped = df.drop(["g"], axis=1, errors="ignore")
98        expected = Index(["d", "e", "f"], name="second")
99        tm.assert_index_equal(dropped.columns, expected)
100
101        dropped = df.drop(["d", "g"], axis=1, errors="ignore")
102        expected = Index(["e", "f"], name="second")
103        tm.assert_index_equal(dropped.columns, expected)
104
105        # GH 16398
106        dropped = df.drop([], errors="ignore")
107        expected = Index(["a", "b", "c"], name="first")
108        tm.assert_index_equal(dropped.index, expected)
109
110    def test_drop(self):
111        simple = DataFrame({"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]})
112        tm.assert_frame_equal(simple.drop("A", axis=1), simple[["B"]])
113        tm.assert_frame_equal(simple.drop(["A", "B"], axis="columns"), simple[[]])
114        tm.assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.loc[[2], :])
115        tm.assert_frame_equal(simple.drop([0, 3], axis="index"), simple.loc[[1, 2], :])
116
117        with pytest.raises(KeyError, match=r"\[5\] not found in axis"):
118            simple.drop(5)
119        with pytest.raises(KeyError, match=r"\['C'\] not found in axis"):
120            simple.drop("C", 1)
121        with pytest.raises(KeyError, match=r"\[5\] not found in axis"):
122            simple.drop([1, 5])
123        with pytest.raises(KeyError, match=r"\['C'\] not found in axis"):
124            simple.drop(["A", "C"], 1)
125
126        # errors = 'ignore'
127        tm.assert_frame_equal(simple.drop(5, errors="ignore"), simple)
128        tm.assert_frame_equal(
129            simple.drop([0, 5], errors="ignore"), simple.loc[[1, 2, 3], :]
130        )
131        tm.assert_frame_equal(simple.drop("C", axis=1, errors="ignore"), simple)
132        tm.assert_frame_equal(
133            simple.drop(["A", "C"], axis=1, errors="ignore"), simple[["B"]]
134        )
135
136        # non-unique - wheee!
137        nu_df = DataFrame(
138            list(zip(range(3), range(-3, 1), list("abc"))), columns=["a", "a", "b"]
139        )
140        tm.assert_frame_equal(nu_df.drop("a", axis=1), nu_df[["b"]])
141        tm.assert_frame_equal(nu_df.drop("b", axis="columns"), nu_df["a"])
142        tm.assert_frame_equal(nu_df.drop([]), nu_df)  # GH 16398
143
144        nu_df = nu_df.set_index(Index(["X", "Y", "X"]))
145        nu_df.columns = list("abc")
146        tm.assert_frame_equal(nu_df.drop("X", axis="rows"), nu_df.loc[["Y"], :])
147        tm.assert_frame_equal(nu_df.drop(["X", "Y"], axis=0), nu_df.loc[[], :])
148
149        # inplace cache issue
150        # GH#5628
151        df = DataFrame(np.random.randn(10, 3), columns=list("abc"))
152        expected = df[~(df.b > 0)]
153        return_value = df.drop(labels=df[df.b > 0].index, inplace=True)
154        assert return_value is None
155        tm.assert_frame_equal(df, expected)
156
157    def test_drop_multiindex_not_lexsorted(self):
158        # GH#11640
159
160        # define the lexsorted version
161        lexsorted_mi = MultiIndex.from_tuples(
162            [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"]
163        )
164        lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
165        assert lexsorted_df.columns.is_lexsorted()
166
167        # define the non-lexsorted version
168        not_lexsorted_df = DataFrame(
169            columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
170        )
171        not_lexsorted_df = not_lexsorted_df.pivot_table(
172            index="a", columns=["b", "c"], values="d"
173        )
174        not_lexsorted_df = not_lexsorted_df.reset_index()
175        assert not not_lexsorted_df.columns.is_lexsorted()
176
177        # compare the results
178        tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
179
180        expected = lexsorted_df.drop("a", axis=1)
181        with tm.assert_produces_warning(PerformanceWarning):
182            result = not_lexsorted_df.drop("a", axis=1)
183
184        tm.assert_frame_equal(result, expected)
185
186    def test_drop_api_equivalence(self):
187        # equivalence of the labels/axis and index/columns API's (GH#12392)
188        df = DataFrame(
189            [[1, 2, 3], [3, 4, 5], [5, 6, 7]],
190            index=["a", "b", "c"],
191            columns=["d", "e", "f"],
192        )
193
194        res1 = df.drop("a")
195        res2 = df.drop(index="a")
196        tm.assert_frame_equal(res1, res2)
197
198        res1 = df.drop("d", 1)
199        res2 = df.drop(columns="d")
200        tm.assert_frame_equal(res1, res2)
201
202        res1 = df.drop(labels="e", axis=1)
203        res2 = df.drop(columns="e")
204        tm.assert_frame_equal(res1, res2)
205
206        res1 = df.drop(["a"], axis=0)
207        res2 = df.drop(index=["a"])
208        tm.assert_frame_equal(res1, res2)
209
210        res1 = df.drop(["a"], axis=0).drop(["d"], axis=1)
211        res2 = df.drop(index=["a"], columns=["d"])
212        tm.assert_frame_equal(res1, res2)
213
214        msg = "Cannot specify both 'labels' and 'index'/'columns'"
215        with pytest.raises(ValueError, match=msg):
216            df.drop(labels="a", index="b")
217
218        with pytest.raises(ValueError, match=msg):
219            df.drop(labels="a", columns="b")
220
221        msg = "Need to specify at least one of 'labels', 'index' or 'columns'"
222        with pytest.raises(ValueError, match=msg):
223            df.drop(axis=1)
224
225    data = [[1, 2, 3], [1, 2, 3]]
226
227    @pytest.mark.parametrize(
228        "actual",
229        [
230            DataFrame(data=data, index=["a", "a"]),
231            DataFrame(data=data, index=["a", "b"]),
232            DataFrame(data=data, index=["a", "b"]).set_index([0, 1]),
233            DataFrame(data=data, index=["a", "a"]).set_index([0, 1]),
234        ],
235    )
236    def test_raise_on_drop_duplicate_index(self, actual):
237
238        # GH#19186
239        level = 0 if isinstance(actual.index, MultiIndex) else None
240        msg = re.escape("\"['c'] not found in axis\"")
241        with pytest.raises(KeyError, match=msg):
242            actual.drop("c", level=level, axis=0)
243        with pytest.raises(KeyError, match=msg):
244            actual.T.drop("c", level=level, axis=1)
245        expected_no_err = actual.drop("c", axis=0, level=level, errors="ignore")
246        tm.assert_frame_equal(expected_no_err, actual)
247        expected_no_err = actual.T.drop("c", axis=1, level=level, errors="ignore")
248        tm.assert_frame_equal(expected_no_err.T, actual)
249
250    @pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 2]])
251    @pytest.mark.parametrize("drop_labels", [[], [1], [2]])
252    def test_drop_empty_list(self, index, drop_labels):
253        # GH#21494
254        expected_index = [i for i in index if i not in drop_labels]
255        frame = DataFrame(index=index).drop(drop_labels)
256        tm.assert_frame_equal(frame, DataFrame(index=expected_index))
257
258    @pytest.mark.parametrize("index", [[1, 2, 3], [1, 2, 2]])
259    @pytest.mark.parametrize("drop_labels", [[1, 4], [4, 5]])
260    def test_drop_non_empty_list(self, index, drop_labels):
261        # GH# 21494
262        with pytest.raises(KeyError, match="not found in axis"):
263            DataFrame(index=index).drop(drop_labels)
264
265    def test_mixed_depth_drop(self):
266        arrays = [
267            ["a", "top", "top", "routine1", "routine1", "routine2"],
268            ["", "OD", "OD", "result1", "result2", "result1"],
269            ["", "wx", "wy", "", "", ""],
270        ]
271
272        tuples = sorted(zip(*arrays))
273        index = MultiIndex.from_tuples(tuples)
274        df = DataFrame(np.random.randn(4, 6), columns=index)
275
276        result = df.drop("a", axis=1)
277        expected = df.drop([("a", "", "")], axis=1)
278        tm.assert_frame_equal(expected, result)
279
280        result = df.drop(["top"], axis=1)
281        expected = df.drop([("top", "OD", "wx")], axis=1)
282        expected = expected.drop([("top", "OD", "wy")], axis=1)
283        tm.assert_frame_equal(expected, result)
284
285        result = df.drop(("top", "OD", "wx"), axis=1)
286        expected = df.drop([("top", "OD", "wx")], axis=1)
287        tm.assert_frame_equal(expected, result)
288
289        expected = df.drop([("top", "OD", "wy")], axis=1)
290        expected = df.drop("top", axis=1)
291
292        result = df.drop("result1", level=1, axis=1)
293        expected = df.drop(
294            [("routine1", "result1", ""), ("routine2", "result1", "")], axis=1
295        )
296        tm.assert_frame_equal(expected, result)
297
298    def test_drop_multiindex_other_level_nan(self):
299        # GH#12754
300        df = (
301            DataFrame(
302                {
303                    "A": ["one", "one", "two", "two"],
304                    "B": [np.nan, 0.0, 1.0, 2.0],
305                    "C": ["a", "b", "c", "c"],
306                    "D": [1, 2, 3, 4],
307                }
308            )
309            .set_index(["A", "B", "C"])
310            .sort_index()
311        )
312        result = df.drop("c", level="C")
313        expected = DataFrame(
314            [2, 1],
315            columns=["D"],
316            index=MultiIndex.from_tuples(
317                [("one", 0.0, "b"), ("one", np.nan, "a")], names=["A", "B", "C"]
318            ),
319        )
320        tm.assert_frame_equal(result, expected)
321
322    def test_drop_nonunique(self):
323        df = DataFrame(
324            [
325                ["x-a", "x", "a", 1.5],
326                ["x-a", "x", "a", 1.2],
327                ["z-c", "z", "c", 3.1],
328                ["x-a", "x", "a", 4.1],
329                ["x-b", "x", "b", 5.1],
330                ["x-b", "x", "b", 4.1],
331                ["x-b", "x", "b", 2.2],
332                ["y-a", "y", "a", 1.2],
333                ["z-b", "z", "b", 2.1],
334            ],
335            columns=["var1", "var2", "var3", "var4"],
336        )
337
338        grp_size = df.groupby("var1").size()
339        drop_idx = grp_size.loc[grp_size == 1]
340
341        idf = df.set_index(["var1", "var2", "var3"])
342
343        # it works! GH#2101
344        result = idf.drop(drop_idx.index, level=0).reset_index()
345        expected = df[-df.var1.isin(drop_idx.index)]
346
347        result.index = expected.index
348
349        tm.assert_frame_equal(result, expected)
350
351    def test_drop_level(self):
352        index = MultiIndex(
353            levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
354            codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
355            names=["first", "second"],
356        )
357        frame = DataFrame(
358            np.random.randn(10, 3),
359            index=index,
360            columns=Index(["A", "B", "C"], name="exp"),
361        )
362
363        result = frame.drop(["bar", "qux"], level="first")
364        expected = frame.iloc[[0, 1, 2, 5, 6]]
365        tm.assert_frame_equal(result, expected)
366
367        result = frame.drop(["two"], level="second")
368        expected = frame.iloc[[0, 2, 3, 6, 7, 9]]
369        tm.assert_frame_equal(result, expected)
370
371        result = frame.T.drop(["bar", "qux"], axis=1, level="first")
372        expected = frame.iloc[[0, 1, 2, 5, 6]].T
373        tm.assert_frame_equal(result, expected)
374
375        result = frame.T.drop(["two"], axis=1, level="second")
376        expected = frame.iloc[[0, 2, 3, 6, 7, 9]].T
377        tm.assert_frame_equal(result, expected)
378
379    def test_drop_level_nonunique_datetime(self):
380        # GH#12701
381        idx = Index([2, 3, 4, 4, 5], name="id")
382        idxdt = pd.to_datetime(
383            [
384                "201603231400",
385                "201603231500",
386                "201603231600",
387                "201603231600",
388                "201603231700",
389            ]
390        )
391        df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx)
392        df["tstamp"] = idxdt
393        df = df.set_index("tstamp", append=True)
394        ts = Timestamp("201603231600")
395        assert df.index.is_unique is False
396
397        result = df.drop(ts, level="tstamp")
398        expected = df.loc[idx != 4]
399        tm.assert_frame_equal(result, expected)
400
401    @pytest.mark.parametrize("box", [Series, DataFrame])
402    def test_drop_tz_aware_timestamp_across_dst(self, box):
403        # GH#21761
404        start = Timestamp("2017-10-29", tz="Europe/Berlin")
405        end = Timestamp("2017-10-29 04:00:00", tz="Europe/Berlin")
406        index = pd.date_range(start, end, freq="15min")
407        data = box(data=[1] * len(index), index=index)
408        result = data.drop(start)
409        expected_start = Timestamp("2017-10-29 00:15:00", tz="Europe/Berlin")
410        expected_idx = pd.date_range(expected_start, end, freq="15min")
411        expected = box(data=[1] * len(expected_idx), index=expected_idx)
412        tm.assert_equal(result, expected)
413
414    def test_drop_preserve_names(self):
415        index = MultiIndex.from_arrays(
416            [[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"]
417        )
418
419        df = DataFrame(np.random.randn(6, 3), index=index)
420
421        result = df.drop([(0, 2)])
422        assert result.index.names == ("one", "two")
423
424    @pytest.mark.parametrize(
425        "operation", ["__iadd__", "__isub__", "__imul__", "__ipow__"]
426    )
427    @pytest.mark.parametrize("inplace", [False, True])
428    def test_inplace_drop_and_operation(self, operation, inplace):
429        # GH#30484
430        df = DataFrame({"x": range(5)})
431        expected = df.copy()
432        df["y"] = range(5)
433        y = df["y"]
434
435        with tm.assert_produces_warning(None):
436            if inplace:
437                df.drop("y", axis=1, inplace=inplace)
438            else:
439                df = df.drop("y", axis=1, inplace=inplace)
440
441            # Perform operation and check result
442            getattr(y, operation)(1)
443            tm.assert_frame_equal(df, expected)
444