1import re 2 3import numpy as np 4import pytest 5 6from pandas.errors import PerformanceWarning 7 8import pandas as pd 9from pandas import DataFrame, Index, MultiIndex, Series, Timestamp 10import pandas._testing as tm 11 12 13@pytest.mark.parametrize( 14 "msg,labels,level", 15 [ 16 (r"labels \[4\] not found in level", 4, "a"), 17 (r"labels \[7\] not found in level", 7, "b"), 18 ], 19) 20def test_drop_raise_exception_if_labels_not_in_level(msg, labels, level): 21 # GH 8594 22 mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) 23 s = pd.Series([10, 20, 30], index=mi) 24 df = DataFrame([10, 20, 30], index=mi) 25 26 with pytest.raises(KeyError, match=msg): 27 s.drop(labels, level=level) 28 with pytest.raises(KeyError, match=msg): 29 df.drop(labels, level=level) 30 31 32@pytest.mark.parametrize("labels,level", [(4, "a"), (7, "b")]) 33def test_drop_errors_ignore(labels, level): 34 # GH 8594 35 mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) 36 s = pd.Series([10, 20, 30], index=mi) 37 df = DataFrame([10, 20, 30], index=mi) 38 39 expected_s = s.drop(labels, level=level, errors="ignore") 40 tm.assert_series_equal(s, expected_s) 41 42 expected_df = df.drop(labels, level=level, errors="ignore") 43 tm.assert_frame_equal(df, expected_df) 44 45 46def test_drop_with_non_unique_datetime_index_and_invalid_keys(): 47 # GH 30399 48 49 # define dataframe with unique datetime index 50 df = DataFrame( 51 np.random.randn(5, 3), 52 columns=["a", "b", "c"], 53 index=pd.date_range("2012", freq="H", periods=5), 54 ) 55 # create dataframe with non-unique datetime index 56 df = df.iloc[[0, 2, 2, 3]].copy() 57 58 with pytest.raises(KeyError, match="not found in axis"): 59 df.drop(["a", "b"]) # Dropping with labels not exist in the index 60 61 62class TestDataFrameDrop: 63 def test_drop_names(self): 64 df = DataFrame( 65 [[1, 2, 3], [3, 4, 5], [5, 6, 7]], 66 index=["a", "b", "c"], 67 columns=["d", "e", "f"], 68 ) 69 df.index.name, df.columns.name = "first", "second" 70 df_dropped_b = df.drop("b") 71 df_dropped_e = df.drop("e", axis=1) 72 df_inplace_b, df_inplace_e = df.copy(), df.copy() 73 return_value = df_inplace_b.drop("b", inplace=True) 74 assert return_value is None 75 return_value = df_inplace_e.drop("e", axis=1, inplace=True) 76 assert return_value is None 77 for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e): 78 assert obj.index.name == "first" 79 assert obj.columns.name == "second" 80 assert list(df.columns) == ["d", "e", "f"] 81 82 msg = r"\['g'\] not found in axis" 83 with pytest.raises(KeyError, match=msg): 84 df.drop(["g"]) 85 with pytest.raises(KeyError, match=msg): 86 df.drop(["g"], 1) 87 88 # errors = 'ignore' 89 dropped = df.drop(["g"], errors="ignore") 90 expected = Index(["a", "b", "c"], name="first") 91 tm.assert_index_equal(dropped.index, expected) 92 93 dropped = df.drop(["b", "g"], errors="ignore") 94 expected = Index(["a", "c"], name="first") 95 tm.assert_index_equal(dropped.index, expected) 96 97 dropped = df.drop(["g"], axis=1, errors="ignore") 98 expected = Index(["d", "e", "f"], name="second") 99 tm.assert_index_equal(dropped.columns, expected) 100 101 dropped = df.drop(["d", "g"], axis=1, errors="ignore") 102 expected = Index(["e", "f"], name="second") 103 tm.assert_index_equal(dropped.columns, expected) 104 105 # GH 16398 106 dropped = df.drop([], errors="ignore") 107 expected = Index(["a", "b", "c"], name="first") 108 tm.assert_index_equal(dropped.index, expected) 109 110 def test_drop(self): 111 simple = DataFrame({"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]}) 112 tm.assert_frame_equal(simple.drop("A", axis=1), simple[["B"]]) 113 tm.assert_frame_equal(simple.drop(["A", "B"], axis="columns"), simple[[]]) 114 tm.assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.loc[[2], :]) 115 tm.assert_frame_equal(simple.drop([0, 3], axis="index"), simple.loc[[1, 2], :]) 116 117 with pytest.raises(KeyError, match=r"\[5\] not found in axis"): 118 simple.drop(5) 119 with pytest.raises(KeyError, match=r"\['C'\] not found in axis"): 120 simple.drop("C", 1) 121 with pytest.raises(KeyError, match=r"\[5\] not found in axis"): 122 simple.drop([1, 5]) 123 with pytest.raises(KeyError, match=r"\['C'\] not found in axis"): 124 simple.drop(["A", "C"], 1) 125 126 # errors = 'ignore' 127 tm.assert_frame_equal(simple.drop(5, errors="ignore"), simple) 128 tm.assert_frame_equal( 129 simple.drop([0, 5], errors="ignore"), simple.loc[[1, 2, 3], :] 130 ) 131 tm.assert_frame_equal(simple.drop("C", axis=1, errors="ignore"), simple) 132 tm.assert_frame_equal( 133 simple.drop(["A", "C"], axis=1, errors="ignore"), simple[["B"]] 134 ) 135 136 # non-unique - wheee! 137 nu_df = DataFrame( 138 list(zip(range(3), range(-3, 1), list("abc"))), columns=["a", "a", "b"] 139 ) 140 tm.assert_frame_equal(nu_df.drop("a", axis=1), nu_df[["b"]]) 141 tm.assert_frame_equal(nu_df.drop("b", axis="columns"), nu_df["a"]) 142 tm.assert_frame_equal(nu_df.drop([]), nu_df) # GH 16398 143 144 nu_df = nu_df.set_index(Index(["X", "Y", "X"])) 145 nu_df.columns = list("abc") 146 tm.assert_frame_equal(nu_df.drop("X", axis="rows"), nu_df.loc[["Y"], :]) 147 tm.assert_frame_equal(nu_df.drop(["X", "Y"], axis=0), nu_df.loc[[], :]) 148 149 # inplace cache issue 150 # GH#5628 151 df = DataFrame(np.random.randn(10, 3), columns=list("abc")) 152 expected = df[~(df.b > 0)] 153 return_value = df.drop(labels=df[df.b > 0].index, inplace=True) 154 assert return_value is None 155 tm.assert_frame_equal(df, expected) 156 157 def test_drop_multiindex_not_lexsorted(self): 158 # GH#11640 159 160 # define the lexsorted version 161 lexsorted_mi = MultiIndex.from_tuples( 162 [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"] 163 ) 164 lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) 165 assert lexsorted_df.columns.is_lexsorted() 166 167 # define the non-lexsorted version 168 not_lexsorted_df = DataFrame( 169 columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] 170 ) 171 not_lexsorted_df = not_lexsorted_df.pivot_table( 172 index="a", columns=["b", "c"], values="d" 173 ) 174 not_lexsorted_df = not_lexsorted_df.reset_index() 175 assert not not_lexsorted_df.columns.is_lexsorted() 176 177 # compare the results 178 tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) 179 180 expected = lexsorted_df.drop("a", axis=1) 181 with tm.assert_produces_warning(PerformanceWarning): 182 result = not_lexsorted_df.drop("a", axis=1) 183 184 tm.assert_frame_equal(result, expected) 185 186 def test_drop_api_equivalence(self): 187 # equivalence of the labels/axis and index/columns API's (GH#12392) 188 df = DataFrame( 189 [[1, 2, 3], [3, 4, 5], [5, 6, 7]], 190 index=["a", "b", "c"], 191 columns=["d", "e", "f"], 192 ) 193 194 res1 = df.drop("a") 195 res2 = df.drop(index="a") 196 tm.assert_frame_equal(res1, res2) 197 198 res1 = df.drop("d", 1) 199 res2 = df.drop(columns="d") 200 tm.assert_frame_equal(res1, res2) 201 202 res1 = df.drop(labels="e", axis=1) 203 res2 = df.drop(columns="e") 204 tm.assert_frame_equal(res1, res2) 205 206 res1 = df.drop(["a"], axis=0) 207 res2 = df.drop(index=["a"]) 208 tm.assert_frame_equal(res1, res2) 209 210 res1 = df.drop(["a"], axis=0).drop(["d"], axis=1) 211 res2 = df.drop(index=["a"], columns=["d"]) 212 tm.assert_frame_equal(res1, res2) 213 214 msg = "Cannot specify both 'labels' and 'index'/'columns'" 215 with pytest.raises(ValueError, match=msg): 216 df.drop(labels="a", index="b") 217 218 with pytest.raises(ValueError, match=msg): 219 df.drop(labels="a", columns="b") 220 221 msg = "Need to specify at least one of 'labels', 'index' or 'columns'" 222 with pytest.raises(ValueError, match=msg): 223 df.drop(axis=1) 224 225 data = [[1, 2, 3], [1, 2, 3]] 226 227 @pytest.mark.parametrize( 228 "actual", 229 [ 230 DataFrame(data=data, index=["a", "a"]), 231 DataFrame(data=data, index=["a", "b"]), 232 DataFrame(data=data, index=["a", "b"]).set_index([0, 1]), 233 DataFrame(data=data, index=["a", "a"]).set_index([0, 1]), 234 ], 235 ) 236 def test_raise_on_drop_duplicate_index(self, actual): 237 238 # GH#19186 239 level = 0 if isinstance(actual.index, MultiIndex) else None 240 msg = re.escape("\"['c'] not found in axis\"") 241 with pytest.raises(KeyError, match=msg): 242 actual.drop("c", level=level, axis=0) 243 with pytest.raises(KeyError, match=msg): 244 actual.T.drop("c", level=level, axis=1) 245 expected_no_err = actual.drop("c", axis=0, level=level, errors="ignore") 246 tm.assert_frame_equal(expected_no_err, actual) 247 expected_no_err = actual.T.drop("c", axis=1, level=level, errors="ignore") 248 tm.assert_frame_equal(expected_no_err.T, actual) 249 250 @pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 2]]) 251 @pytest.mark.parametrize("drop_labels", [[], [1], [2]]) 252 def test_drop_empty_list(self, index, drop_labels): 253 # GH#21494 254 expected_index = [i for i in index if i not in drop_labels] 255 frame = DataFrame(index=index).drop(drop_labels) 256 tm.assert_frame_equal(frame, DataFrame(index=expected_index)) 257 258 @pytest.mark.parametrize("index", [[1, 2, 3], [1, 2, 2]]) 259 @pytest.mark.parametrize("drop_labels", [[1, 4], [4, 5]]) 260 def test_drop_non_empty_list(self, index, drop_labels): 261 # GH# 21494 262 with pytest.raises(KeyError, match="not found in axis"): 263 DataFrame(index=index).drop(drop_labels) 264 265 def test_mixed_depth_drop(self): 266 arrays = [ 267 ["a", "top", "top", "routine1", "routine1", "routine2"], 268 ["", "OD", "OD", "result1", "result2", "result1"], 269 ["", "wx", "wy", "", "", ""], 270 ] 271 272 tuples = sorted(zip(*arrays)) 273 index = MultiIndex.from_tuples(tuples) 274 df = DataFrame(np.random.randn(4, 6), columns=index) 275 276 result = df.drop("a", axis=1) 277 expected = df.drop([("a", "", "")], axis=1) 278 tm.assert_frame_equal(expected, result) 279 280 result = df.drop(["top"], axis=1) 281 expected = df.drop([("top", "OD", "wx")], axis=1) 282 expected = expected.drop([("top", "OD", "wy")], axis=1) 283 tm.assert_frame_equal(expected, result) 284 285 result = df.drop(("top", "OD", "wx"), axis=1) 286 expected = df.drop([("top", "OD", "wx")], axis=1) 287 tm.assert_frame_equal(expected, result) 288 289 expected = df.drop([("top", "OD", "wy")], axis=1) 290 expected = df.drop("top", axis=1) 291 292 result = df.drop("result1", level=1, axis=1) 293 expected = df.drop( 294 [("routine1", "result1", ""), ("routine2", "result1", "")], axis=1 295 ) 296 tm.assert_frame_equal(expected, result) 297 298 def test_drop_multiindex_other_level_nan(self): 299 # GH#12754 300 df = ( 301 DataFrame( 302 { 303 "A": ["one", "one", "two", "two"], 304 "B": [np.nan, 0.0, 1.0, 2.0], 305 "C": ["a", "b", "c", "c"], 306 "D": [1, 2, 3, 4], 307 } 308 ) 309 .set_index(["A", "B", "C"]) 310 .sort_index() 311 ) 312 result = df.drop("c", level="C") 313 expected = DataFrame( 314 [2, 1], 315 columns=["D"], 316 index=MultiIndex.from_tuples( 317 [("one", 0.0, "b"), ("one", np.nan, "a")], names=["A", "B", "C"] 318 ), 319 ) 320 tm.assert_frame_equal(result, expected) 321 322 def test_drop_nonunique(self): 323 df = DataFrame( 324 [ 325 ["x-a", "x", "a", 1.5], 326 ["x-a", "x", "a", 1.2], 327 ["z-c", "z", "c", 3.1], 328 ["x-a", "x", "a", 4.1], 329 ["x-b", "x", "b", 5.1], 330 ["x-b", "x", "b", 4.1], 331 ["x-b", "x", "b", 2.2], 332 ["y-a", "y", "a", 1.2], 333 ["z-b", "z", "b", 2.1], 334 ], 335 columns=["var1", "var2", "var3", "var4"], 336 ) 337 338 grp_size = df.groupby("var1").size() 339 drop_idx = grp_size.loc[grp_size == 1] 340 341 idf = df.set_index(["var1", "var2", "var3"]) 342 343 # it works! GH#2101 344 result = idf.drop(drop_idx.index, level=0).reset_index() 345 expected = df[-df.var1.isin(drop_idx.index)] 346 347 result.index = expected.index 348 349 tm.assert_frame_equal(result, expected) 350 351 def test_drop_level(self): 352 index = MultiIndex( 353 levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], 354 codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], 355 names=["first", "second"], 356 ) 357 frame = DataFrame( 358 np.random.randn(10, 3), 359 index=index, 360 columns=Index(["A", "B", "C"], name="exp"), 361 ) 362 363 result = frame.drop(["bar", "qux"], level="first") 364 expected = frame.iloc[[0, 1, 2, 5, 6]] 365 tm.assert_frame_equal(result, expected) 366 367 result = frame.drop(["two"], level="second") 368 expected = frame.iloc[[0, 2, 3, 6, 7, 9]] 369 tm.assert_frame_equal(result, expected) 370 371 result = frame.T.drop(["bar", "qux"], axis=1, level="first") 372 expected = frame.iloc[[0, 1, 2, 5, 6]].T 373 tm.assert_frame_equal(result, expected) 374 375 result = frame.T.drop(["two"], axis=1, level="second") 376 expected = frame.iloc[[0, 2, 3, 6, 7, 9]].T 377 tm.assert_frame_equal(result, expected) 378 379 def test_drop_level_nonunique_datetime(self): 380 # GH#12701 381 idx = Index([2, 3, 4, 4, 5], name="id") 382 idxdt = pd.to_datetime( 383 [ 384 "201603231400", 385 "201603231500", 386 "201603231600", 387 "201603231600", 388 "201603231700", 389 ] 390 ) 391 df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx) 392 df["tstamp"] = idxdt 393 df = df.set_index("tstamp", append=True) 394 ts = Timestamp("201603231600") 395 assert df.index.is_unique is False 396 397 result = df.drop(ts, level="tstamp") 398 expected = df.loc[idx != 4] 399 tm.assert_frame_equal(result, expected) 400 401 @pytest.mark.parametrize("box", [Series, DataFrame]) 402 def test_drop_tz_aware_timestamp_across_dst(self, box): 403 # GH#21761 404 start = Timestamp("2017-10-29", tz="Europe/Berlin") 405 end = Timestamp("2017-10-29 04:00:00", tz="Europe/Berlin") 406 index = pd.date_range(start, end, freq="15min") 407 data = box(data=[1] * len(index), index=index) 408 result = data.drop(start) 409 expected_start = Timestamp("2017-10-29 00:15:00", tz="Europe/Berlin") 410 expected_idx = pd.date_range(expected_start, end, freq="15min") 411 expected = box(data=[1] * len(expected_idx), index=expected_idx) 412 tm.assert_equal(result, expected) 413 414 def test_drop_preserve_names(self): 415 index = MultiIndex.from_arrays( 416 [[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"] 417 ) 418 419 df = DataFrame(np.random.randn(6, 3), index=index) 420 421 result = df.drop([(0, 2)]) 422 assert result.index.names == ("one", "two") 423 424 @pytest.mark.parametrize( 425 "operation", ["__iadd__", "__isub__", "__imul__", "__ipow__"] 426 ) 427 @pytest.mark.parametrize("inplace", [False, True]) 428 def test_inplace_drop_and_operation(self, operation, inplace): 429 # GH#30484 430 df = DataFrame({"x": range(5)}) 431 expected = df.copy() 432 df["y"] = range(5) 433 y = df["y"] 434 435 with tm.assert_produces_warning(None): 436 if inplace: 437 df.drop("y", axis=1, inplace=inplace) 438 else: 439 df = df.drop("y", axis=1, inplace=inplace) 440 441 # Perform operation and check result 442 getattr(y, operation)(1) 443 tm.assert_frame_equal(df, expected) 444