1from copy import deepcopy 2import inspect 3import pydoc 4 5import numpy as np 6import pytest 7 8import pandas.util._test_decorators as td 9from pandas.util._test_decorators import async_mark, skip_if_no 10 11import pandas as pd 12from pandas import DataFrame, Series, date_range, timedelta_range 13import pandas._testing as tm 14 15 16class TestDataFrameMisc: 17 def test_getitem_pop_assign_name(self, float_frame): 18 s = float_frame["A"] 19 assert s.name == "A" 20 21 s = float_frame.pop("A") 22 assert s.name == "A" 23 24 s = float_frame.loc[:, "B"] 25 assert s.name == "B" 26 27 s2 = s.loc[:] 28 assert s2.name == "B" 29 30 def test_get_axis(self, float_frame): 31 f = float_frame 32 assert f._get_axis_number(0) == 0 33 assert f._get_axis_number(1) == 1 34 assert f._get_axis_number("index") == 0 35 assert f._get_axis_number("rows") == 0 36 assert f._get_axis_number("columns") == 1 37 38 assert f._get_axis_name(0) == "index" 39 assert f._get_axis_name(1) == "columns" 40 assert f._get_axis_name("index") == "index" 41 assert f._get_axis_name("rows") == "index" 42 assert f._get_axis_name("columns") == "columns" 43 44 assert f._get_axis(0) is f.index 45 assert f._get_axis(1) is f.columns 46 47 with pytest.raises(ValueError, match="No axis named"): 48 f._get_axis_number(2) 49 50 with pytest.raises(ValueError, match="No axis.*foo"): 51 f._get_axis_name("foo") 52 53 with pytest.raises(ValueError, match="No axis.*None"): 54 f._get_axis_name(None) 55 56 with pytest.raises(ValueError, match="No axis named"): 57 f._get_axis_number(None) 58 59 def test_column_contains_raises(self, float_frame): 60 with pytest.raises(TypeError, match="unhashable type: 'Index'"): 61 float_frame.columns in float_frame 62 63 def test_tab_completion(self): 64 # DataFrame whose columns are identifiers shall have them in __dir__. 65 df = DataFrame([list("abcd"), list("efgh")], columns=list("ABCD")) 66 for key in list("ABCD"): 67 assert key in dir(df) 68 assert isinstance(df.__getitem__("A"), pd.Series) 69 70 # DataFrame whose first-level columns are identifiers shall have 71 # them in __dir__. 72 df = DataFrame( 73 [list("abcd"), list("efgh")], 74 columns=pd.MultiIndex.from_tuples(list(zip("ABCD", "EFGH"))), 75 ) 76 for key in list("ABCD"): 77 assert key in dir(df) 78 for key in list("EFGH"): 79 assert key not in dir(df) 80 assert isinstance(df.__getitem__("A"), pd.DataFrame) 81 82 def test_not_hashable(self): 83 empty_frame = DataFrame() 84 85 df = DataFrame([1]) 86 msg = "'DataFrame' objects are mutable, thus they cannot be hashed" 87 with pytest.raises(TypeError, match=msg): 88 hash(df) 89 with pytest.raises(TypeError, match=msg): 90 hash(empty_frame) 91 92 def test_column_name_contains_unicode_surrogate(self): 93 # GH 25509 94 colname = "\ud83d" 95 df = DataFrame({colname: []}) 96 # this should not crash 97 assert colname not in dir(df) 98 assert df.columns[0] == colname 99 100 def test_new_empty_index(self): 101 df1 = DataFrame(np.random.randn(0, 3)) 102 df2 = DataFrame(np.random.randn(0, 3)) 103 df1.index.name = "foo" 104 assert df2.index.name is None 105 106 def test_get_agg_axis(self, float_frame): 107 cols = float_frame._get_agg_axis(0) 108 assert cols is float_frame.columns 109 110 idx = float_frame._get_agg_axis(1) 111 assert idx is float_frame.index 112 113 msg = r"Axis must be 0 or 1 \(got 2\)" 114 with pytest.raises(ValueError, match=msg): 115 float_frame._get_agg_axis(2) 116 117 def test_empty(self, float_frame, float_string_frame): 118 empty_frame = DataFrame() 119 assert empty_frame.empty 120 121 assert not float_frame.empty 122 assert not float_string_frame.empty 123 124 # corner case 125 df = DataFrame({"A": [1.0, 2.0, 3.0], "B": ["a", "b", "c"]}, index=np.arange(3)) 126 del df["A"] 127 assert not df.empty 128 129 def test_len(self, float_frame): 130 assert len(float_frame) == len(float_frame.index) 131 132 # single block corner case 133 arr = float_frame[["A", "B"]].values 134 expected = float_frame.reindex(columns=["A", "B"]).values 135 tm.assert_almost_equal(arr, expected) 136 137 def test_axis_aliases(self, float_frame): 138 f = float_frame 139 140 # reg name 141 expected = f.sum(axis=0) 142 result = f.sum(axis="index") 143 tm.assert_series_equal(result, expected) 144 145 expected = f.sum(axis=1) 146 result = f.sum(axis="columns") 147 tm.assert_series_equal(result, expected) 148 149 def test_class_axis(self): 150 # GH 18147 151 # no exception and no empty docstring 152 assert pydoc.getdoc(DataFrame.index) 153 assert pydoc.getdoc(DataFrame.columns) 154 155 def test_series_put_names(self, float_string_frame): 156 series = float_string_frame._series 157 for k, v in series.items(): 158 assert v.name == k 159 160 def test_empty_nonzero(self): 161 df = DataFrame([1, 2, 3]) 162 assert not df.empty 163 df = DataFrame(index=[1], columns=[1]) 164 assert not df.empty 165 df = DataFrame(index=["a", "b"], columns=["c", "d"]).dropna() 166 assert df.empty 167 assert df.T.empty 168 empty_frames = [ 169 DataFrame(), 170 DataFrame(index=[1]), 171 DataFrame(columns=[1]), 172 DataFrame({1: []}), 173 ] 174 for df in empty_frames: 175 assert df.empty 176 assert df.T.empty 177 178 def test_with_datetimelikes(self): 179 180 df = DataFrame( 181 { 182 "A": date_range("20130101", periods=10), 183 "B": timedelta_range("1 day", periods=10), 184 } 185 ) 186 t = df.T 187 188 result = t.dtypes.value_counts() 189 expected = Series({np.dtype("object"): 10}) 190 tm.assert_series_equal(result, expected) 191 192 def test_deepcopy(self, float_frame): 193 cp = deepcopy(float_frame) 194 series = cp["A"] 195 series[:] = 10 196 for idx, value in series.items(): 197 assert float_frame["A"][idx] != value 198 199 def test_inplace_return_self(self): 200 # GH 1893 201 202 data = DataFrame( 203 {"a": ["foo", "bar", "baz", "qux"], "b": [0, 0, 1, 1], "c": [1, 2, 3, 4]} 204 ) 205 206 def _check_f(base, f): 207 result = f(base) 208 assert result is None 209 210 # -----DataFrame----- 211 212 # set_index 213 f = lambda x: x.set_index("a", inplace=True) 214 _check_f(data.copy(), f) 215 216 # reset_index 217 f = lambda x: x.reset_index(inplace=True) 218 _check_f(data.set_index("a"), f) 219 220 # drop_duplicates 221 f = lambda x: x.drop_duplicates(inplace=True) 222 _check_f(data.copy(), f) 223 224 # sort 225 f = lambda x: x.sort_values("b", inplace=True) 226 _check_f(data.copy(), f) 227 228 # sort_index 229 f = lambda x: x.sort_index(inplace=True) 230 _check_f(data.copy(), f) 231 232 # fillna 233 f = lambda x: x.fillna(0, inplace=True) 234 _check_f(data.copy(), f) 235 236 # replace 237 f = lambda x: x.replace(1, 0, inplace=True) 238 _check_f(data.copy(), f) 239 240 # rename 241 f = lambda x: x.rename({1: "foo"}, inplace=True) 242 _check_f(data.copy(), f) 243 244 # -----Series----- 245 d = data.copy()["c"] 246 247 # reset_index 248 f = lambda x: x.reset_index(inplace=True, drop=True) 249 _check_f(data.set_index("a")["c"], f) 250 251 # fillna 252 f = lambda x: x.fillna(0, inplace=True) 253 _check_f(d.copy(), f) 254 255 # replace 256 f = lambda x: x.replace(1, 0, inplace=True) 257 _check_f(d.copy(), f) 258 259 # rename 260 f = lambda x: x.rename({1: "foo"}, inplace=True) 261 _check_f(d.copy(), f) 262 263 @async_mark() 264 @td.check_file_leaks 265 async def test_tab_complete_warning(self, ip, frame_or_series): 266 # GH 16409 267 pytest.importorskip("IPython", minversion="6.0.0") 268 from IPython.core.completer import provisionalcompleter 269 270 if frame_or_series is DataFrame: 271 code = "from pandas import DataFrame; obj = DataFrame()" 272 else: 273 code = "from pandas import Series; obj = Series(dtype=object)" 274 275 await ip.run_code(code) 276 277 # GH 31324 newer jedi version raises Deprecation warning; 278 # appears resolved 2021-02-02 279 with tm.assert_produces_warning(None): 280 with provisionalcompleter("ignore"): 281 list(ip.Completer.completions("obj.", 1)) 282 283 def test_attrs(self): 284 df = DataFrame({"A": [2, 3]}) 285 assert df.attrs == {} 286 df.attrs["version"] = 1 287 288 result = df.rename(columns=str) 289 assert result.attrs == {"version": 1} 290 291 @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) 292 def test_set_flags(self, allows_duplicate_labels, frame_or_series): 293 obj = DataFrame({"A": [1, 2]}) 294 key = (0, 0) 295 if frame_or_series is Series: 296 obj = obj["A"] 297 key = 0 298 299 result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels) 300 301 if allows_duplicate_labels is None: 302 # We don't update when it's not provided 303 assert result.flags.allows_duplicate_labels is True 304 else: 305 assert result.flags.allows_duplicate_labels is allows_duplicate_labels 306 307 # We made a copy 308 assert obj is not result 309 310 # We didn't mutate obj 311 assert obj.flags.allows_duplicate_labels is True 312 313 # But we didn't copy data 314 result.iloc[key] = 0 315 assert obj.iloc[key] == 0 316 317 # Now we do copy. 318 result = obj.set_flags( 319 copy=True, allows_duplicate_labels=allows_duplicate_labels 320 ) 321 result.iloc[key] = 10 322 assert obj.iloc[key] == 0 323 324 def test_constructor_expanddim_lookup(self): 325 # GH#33628 accessing _constructor_expanddim should not 326 # raise NotImplementedError 327 df = DataFrame() 328 329 with pytest.raises(NotImplementedError, match="Not supported for DataFrames!"): 330 df._constructor_expanddim(np.arange(27).reshape(3, 3, 3)) 331 332 @skip_if_no("jinja2") 333 def test_inspect_getmembers(self): 334 # GH38740 335 df = DataFrame() 336 with tm.assert_produces_warning(None): 337 inspect.getmembers(df) 338