1import re 2 3import numpy as np 4import pytest 5 6from pandas import DataFrame, Index, IndexSlice, MultiIndex, Series, concat 7import pandas._testing as tm 8import pandas.core.common as com 9 10from pandas.tseries.offsets import BDay 11 12 13@pytest.fixture 14def four_level_index_dataframe(): 15 arr = np.array( 16 [ 17 [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], 18 [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], 19 [-0.6662, -0.5243, -0.358, 0.89145, 2.5838], 20 ] 21 ) 22 index = MultiIndex( 23 levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]], 24 codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]], 25 names=["one", "two", "three", "four"], 26 ) 27 return DataFrame(arr, index=index, columns=list("ABCDE")) 28 29 30class TestXS: 31 def test_xs(self, float_frame, datetime_frame): 32 idx = float_frame.index[5] 33 xs = float_frame.xs(idx) 34 for item, value in xs.items(): 35 if np.isnan(value): 36 assert np.isnan(float_frame[item][idx]) 37 else: 38 assert value == float_frame[item][idx] 39 40 # mixed-type xs 41 test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} 42 frame = DataFrame(test_data) 43 xs = frame.xs("1") 44 assert xs.dtype == np.object_ 45 assert xs["A"] == 1 46 assert xs["B"] == "1" 47 48 with pytest.raises( 49 KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')") 50 ): 51 datetime_frame.xs(datetime_frame.index[0] - BDay()) 52 53 # xs get column 54 series = float_frame.xs("A", axis=1) 55 expected = float_frame["A"] 56 tm.assert_series_equal(series, expected) 57 58 # view is returned if possible 59 series = float_frame.xs("A", axis=1) 60 series[:] = 5 61 assert (expected == 5).all() 62 63 def test_xs_corner(self): 64 # pathological mixed-type reordering case 65 df = DataFrame(index=[0]) 66 df["A"] = 1.0 67 df["B"] = "foo" 68 df["C"] = 2.0 69 df["D"] = "bar" 70 df["E"] = 3.0 71 72 xs = df.xs(0) 73 exp = Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0) 74 tm.assert_series_equal(xs, exp) 75 76 # no columns but Index(dtype=object) 77 df = DataFrame(index=["a", "b", "c"]) 78 result = df.xs("a") 79 expected = Series([], name="a", index=Index([]), dtype=np.float64) 80 tm.assert_series_equal(result, expected) 81 82 def test_xs_duplicates(self): 83 df = DataFrame(np.random.randn(5, 2), index=["b", "b", "c", "b", "a"]) 84 85 cross = df.xs("c") 86 exp = df.iloc[2] 87 tm.assert_series_equal(cross, exp) 88 89 def test_xs_keep_level(self): 90 df = DataFrame( 91 { 92 "day": {0: "sat", 1: "sun"}, 93 "flavour": {0: "strawberry", 1: "strawberry"}, 94 "sales": {0: 10, 1: 12}, 95 "year": {0: 2008, 1: 2008}, 96 } 97 ).set_index(["year", "flavour", "day"]) 98 result = df.xs("sat", level="day", drop_level=False) 99 expected = df[:1] 100 tm.assert_frame_equal(result, expected) 101 102 result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False) 103 tm.assert_frame_equal(result, expected) 104 105 def test_xs_view(self): 106 # in 0.14 this will return a view if possible a copy otherwise, but 107 # this is numpy dependent 108 109 dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5)) 110 111 dm.xs(2)[:] = 10 112 assert (dm.xs(2) == 10).all() 113 114 115class TestXSWithMultiIndex: 116 def test_xs_integer_key(self): 117 # see GH#2107 118 dates = range(20111201, 20111205) 119 ids = list("abcde") 120 index = MultiIndex.from_product([dates, ids], names=["date", "secid"]) 121 df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"]) 122 123 result = df.xs(20111201, level="date") 124 expected = df.loc[20111201, :] 125 tm.assert_frame_equal(result, expected) 126 127 def test_xs_level(self, multiindex_dataframe_random_data): 128 df = multiindex_dataframe_random_data 129 result = df.xs("two", level="second") 130 expected = df[df.index.get_level_values(1) == "two"] 131 expected.index = Index(["foo", "bar", "baz", "qux"], name="first") 132 tm.assert_frame_equal(result, expected) 133 134 def test_xs_level_eq_2(self): 135 arr = np.random.randn(3, 5) 136 index = MultiIndex( 137 levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]], 138 codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]], 139 ) 140 df = DataFrame(arr, index=index) 141 expected = DataFrame(arr[1:2], index=[["a"], ["b"]]) 142 result = df.xs("c", level=2) 143 tm.assert_frame_equal(result, expected) 144 145 def test_xs_setting_with_copy_error(self, multiindex_dataframe_random_data): 146 # this is a copy in 0.14 147 df = multiindex_dataframe_random_data 148 result = df.xs("two", level="second") 149 150 # setting this will give a SettingWithCopyError 151 # as we are trying to write a view 152 msg = "A value is trying to be set on a copy of a slice from a DataFrame" 153 with pytest.raises(com.SettingWithCopyError, match=msg): 154 result[:] = 10 155 156 def test_xs_setting_with_copy_error_multiple(self, four_level_index_dataframe): 157 # this is a copy in 0.14 158 df = four_level_index_dataframe 159 result = df.xs(("a", 4), level=["one", "four"]) 160 161 # setting this will give a SettingWithCopyError 162 # as we are trying to write a view 163 msg = "A value is trying to be set on a copy of a slice from a DataFrame" 164 with pytest.raises(com.SettingWithCopyError, match=msg): 165 result[:] = 10 166 167 @pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])]) 168 def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data): 169 # see GH#13719 170 frame = multiindex_dataframe_random_data 171 df = concat([frame] * 2) 172 assert df.index.is_unique is False 173 expected = concat([frame.xs("one", level="second")] * 2) 174 175 result = df.xs(key, level=level) 176 tm.assert_frame_equal(result, expected) 177 178 def test_xs_missing_values_in_index(self): 179 # see GH#6574 180 # missing values in returned index should be preserved 181 acc = [ 182 ("a", "abcde", 1), 183 ("b", "bbcde", 2), 184 ("y", "yzcde", 25), 185 ("z", "xbcde", 24), 186 ("z", None, 26), 187 ("z", "zbcde", 25), 188 ("z", "ybcde", 26), 189 ] 190 df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"]) 191 expected = DataFrame( 192 {"cnt": [24, 26, 25, 26]}, 193 index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"), 194 ) 195 196 result = df.xs("z", level="a1") 197 tm.assert_frame_equal(result, expected) 198 199 @pytest.mark.parametrize( 200 "key, level, exp_arr, exp_index", 201 [ 202 ("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")), 203 ("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")), 204 ], 205 ) 206 def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index): 207 # see GH#2903 208 arr = np.random.randn(4, 4) 209 index = MultiIndex( 210 levels=[["a", "b"], ["bar", "foo", "hello", "world"]], 211 codes=[[0, 0, 1, 1], [0, 1, 2, 3]], 212 names=["lvl0", "lvl1"], 213 ) 214 df = DataFrame(arr, columns=index) 215 result = df.xs(key, level=level, axis=1) 216 expected = DataFrame(exp_arr(arr), columns=exp_index) 217 tm.assert_frame_equal(result, expected) 218 219 @pytest.mark.parametrize( 220 "indexer", 221 [ 222 lambda df: df.xs(("a", 4), level=["one", "four"]), 223 lambda df: df.xs("a").xs(4, level="four"), 224 ], 225 ) 226 def test_xs_level_multiple(self, indexer, four_level_index_dataframe): 227 df = four_level_index_dataframe 228 expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]] 229 expected_index = MultiIndex( 230 levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"] 231 ) 232 expected = DataFrame( 233 expected_values, index=expected_index, columns=list("ABCDE") 234 ) 235 result = indexer(df) 236 tm.assert_frame_equal(result, expected) 237 238 @pytest.mark.parametrize( 239 "indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")] 240 ) 241 def test_xs_level0(self, indexer, four_level_index_dataframe): 242 df = four_level_index_dataframe 243 expected_values = [ 244 [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], 245 [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], 246 ] 247 expected_index = MultiIndex( 248 levels=[["b", "q"], [10.0032, 20.0], [4, 5]], 249 codes=[[0, 1], [0, 1], [1, 0]], 250 names=["two", "three", "four"], 251 ) 252 expected = DataFrame( 253 expected_values, index=expected_index, columns=list("ABCDE") 254 ) 255 256 result = indexer(df) 257 tm.assert_frame_equal(result, expected) 258 259 def test_xs_values(self, multiindex_dataframe_random_data): 260 df = multiindex_dataframe_random_data 261 result = df.xs(("bar", "two")).values 262 expected = df.values[4] 263 tm.assert_almost_equal(result, expected) 264 265 def test_xs_loc_equality(self, multiindex_dataframe_random_data): 266 df = multiindex_dataframe_random_data 267 result = df.xs(("bar", "two")) 268 expected = df.loc[("bar", "two")] 269 tm.assert_series_equal(result, expected) 270 271 @pytest.mark.parametrize("klass", [DataFrame, Series]) 272 def test_xs_IndexSlice_argument_not_implemented(self, klass): 273 # GH#35301 274 275 index = MultiIndex( 276 levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]], 277 codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], 278 ) 279 280 obj = DataFrame(np.random.randn(6, 4), index=index) 281 if klass is Series: 282 obj = obj[0] 283 284 msg = ( 285 "Expected label or tuple of labels, got " 286 r"\(\('foo', 'qux', 0\), slice\(None, None, None\)\)" 287 ) 288 with pytest.raises(TypeError, match=msg): 289 obj.xs(IndexSlice[("foo", "qux", 0), :]) 290 291 @pytest.mark.parametrize("klass", [DataFrame, Series]) 292 def test_xs_levels_raises(self, klass): 293 obj = DataFrame({"A": [1, 2, 3]}) 294 if klass is Series: 295 obj = obj["A"] 296 297 msg = "Index must be a MultiIndex" 298 with pytest.raises(TypeError, match=msg): 299 obj.xs(0, level="as") 300 301 def test_xs_multiindex_droplevel_false(self): 302 # GH#19056 303 mi = MultiIndex.from_tuples( 304 [("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"] 305 ) 306 df = DataFrame([[1, 2, 3]], columns=mi) 307 result = df.xs("a", axis=1, drop_level=False) 308 expected = DataFrame( 309 [[1, 2]], 310 columns=MultiIndex.from_tuples( 311 [("a", "x"), ("a", "y")], names=["level1", "level2"] 312 ), 313 ) 314 tm.assert_frame_equal(result, expected) 315 316 def test_xs_droplevel_false(self): 317 # GH#19056 318 df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) 319 result = df.xs("a", axis=1, drop_level=False) 320 expected = DataFrame({"a": [1]}) 321 tm.assert_frame_equal(result, expected) 322 323 def test_xs_droplevel_false_view(self): 324 # GH#37832 325 df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) 326 result = df.xs("a", axis=1, drop_level=False) 327 df.values[0, 0] = 2 328 expected = DataFrame({"a": [2]}) 329 tm.assert_frame_equal(result, expected) 330