1from collections import OrderedDict, abc 2from datetime import date, datetime, timedelta 3import functools 4import itertools 5import re 6 7import numpy as np 8import numpy.ma as ma 9import numpy.ma.mrecords as mrecords 10import pytest 11import pytz 12 13from pandas.compat import is_platform_little_endian 14from pandas.compat.numpy import _np_version_under1p19 15 16from pandas.core.dtypes.common import is_integer_dtype 17from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype 18 19import pandas as pd 20from pandas import ( 21 Categorical, 22 CategoricalIndex, 23 DataFrame, 24 Index, 25 Interval, 26 MultiIndex, 27 Period, 28 RangeIndex, 29 Series, 30 Timedelta, 31 Timestamp, 32 date_range, 33 isna, 34) 35import pandas._testing as tm 36from pandas.arrays import IntervalArray, PeriodArray, SparseArray 37from pandas.core.construction import create_series_with_explicit_dtype 38 39MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] 40MIXED_INT_DTYPES = [ 41 "uint8", 42 "uint16", 43 "uint32", 44 "uint64", 45 "int8", 46 "int16", 47 "int32", 48 "int64", 49] 50 51 52class TestDataFrameConstructors: 53 def test_series_with_name_not_matching_column(self): 54 # GH#9232 55 x = Series(range(5), name=1) 56 y = Series(range(5), name=0) 57 58 result = DataFrame(x, columns=[0]) 59 expected = DataFrame([], columns=[0]) 60 tm.assert_frame_equal(result, expected) 61 62 result = DataFrame(y, columns=[1]) 63 expected = DataFrame([], columns=[1]) 64 tm.assert_frame_equal(result, expected) 65 66 @pytest.mark.parametrize( 67 "constructor", 68 [ 69 lambda: DataFrame(), 70 lambda: DataFrame(None), 71 lambda: DataFrame({}), 72 lambda: DataFrame(()), 73 lambda: DataFrame([]), 74 lambda: DataFrame(_ for _ in []), 75 lambda: DataFrame(range(0)), 76 lambda: DataFrame(data=None), 77 lambda: DataFrame(data={}), 78 lambda: DataFrame(data=()), 79 lambda: DataFrame(data=[]), 80 lambda: DataFrame(data=(_ for _ in [])), 81 lambda: DataFrame(data=range(0)), 82 ], 83 ) 84 def test_empty_constructor(self, constructor): 85 expected = DataFrame() 86 result = constructor() 87 assert len(result.index) == 0 88 assert len(result.columns) == 0 89 tm.assert_frame_equal(result, expected) 90 91 @pytest.mark.parametrize( 92 "emptylike,expected_index,expected_columns", 93 [ 94 ([[]], RangeIndex(1), RangeIndex(0)), 95 ([[], []], RangeIndex(2), RangeIndex(0)), 96 ([(_ for _ in [])], RangeIndex(1), RangeIndex(0)), 97 ], 98 ) 99 def test_emptylike_constructor(self, emptylike, expected_index, expected_columns): 100 expected = DataFrame(index=expected_index, columns=expected_columns) 101 result = DataFrame(emptylike) 102 tm.assert_frame_equal(result, expected) 103 104 def test_constructor_mixed(self, float_string_frame): 105 index, data = tm.getMixedTypeDict() 106 107 # TODO(wesm), incomplete test? 108 indexed_frame = DataFrame(data, index=index) # noqa 109 unindexed_frame = DataFrame(data) # noqa 110 111 assert float_string_frame["foo"].dtype == np.object_ 112 113 def test_constructor_cast_failure(self): 114 foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64) 115 assert foo["a"].dtype == object 116 117 # GH 3010, constructing with odd arrays 118 df = DataFrame(np.ones((4, 2))) 119 120 # this is ok 121 df["foo"] = np.ones((4, 2)).tolist() 122 123 # this is not ok 124 msg = "Wrong number of items passed 2, placement implies 1" 125 with pytest.raises(ValueError, match=msg): 126 df["test"] = np.ones((4, 2)) 127 128 # this is ok 129 df["foo2"] = np.ones((4, 2)).tolist() 130 131 def test_constructor_dtype_copy(self): 132 orig_df = DataFrame({"col1": [1.0], "col2": [2.0], "col3": [3.0]}) 133 134 new_df = DataFrame(orig_df, dtype=float, copy=True) 135 136 new_df["col1"] = 200.0 137 assert orig_df["col1"][0] == 1.0 138 139 def test_constructor_dtype_nocast_view(self): 140 df = DataFrame([[1, 2]]) 141 should_be_view = DataFrame(df, dtype=df[0].dtype) 142 should_be_view[0][0] = 99 143 assert df.values[0, 0] == 99 144 145 should_be_view = DataFrame(df.values, dtype=df[0].dtype) 146 should_be_view[0][0] = 97 147 assert df.values[0, 0] == 97 148 149 def test_constructor_dtype_list_data(self): 150 df = DataFrame([[1, "2"], [None, "a"]], dtype=object) 151 assert df.loc[1, 0] is None 152 assert df.loc[0, 1] == "2" 153 154 @pytest.mark.skipif(_np_version_under1p19, reason="NumPy change.") 155 def test_constructor_list_of_2d_raises(self): 156 # https://github.com/pandas-dev/pandas/issues/32289 157 a = DataFrame() 158 b = np.empty((0, 0)) 159 with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"): 160 DataFrame([a]) 161 162 with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"): 163 DataFrame([b]) 164 165 a = DataFrame({"A": [1, 2]}) 166 with pytest.raises(ValueError, match=r"shape=\(2, 2, 1\)"): 167 DataFrame([a, a]) 168 169 def test_constructor_mixed_dtypes(self): 170 def _make_mixed_dtypes_df(typ, ad=None): 171 172 if typ == "int": 173 dtypes = MIXED_INT_DTYPES 174 arrays = [np.array(np.random.rand(10), dtype=d) for d in dtypes] 175 elif typ == "float": 176 dtypes = MIXED_FLOAT_DTYPES 177 arrays = [ 178 np.array(np.random.randint(10, size=10), dtype=d) for d in dtypes 179 ] 180 181 for d, a in zip(dtypes, arrays): 182 assert a.dtype == d 183 if ad is None: 184 ad = {} 185 ad.update({d: a for d, a in zip(dtypes, arrays)}) 186 return DataFrame(ad) 187 188 def _check_mixed_dtypes(df, dtypes=None): 189 if dtypes is None: 190 dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES 191 for d in dtypes: 192 if d in df: 193 assert df.dtypes[d] == d 194 195 # mixed floating and integer coexist in the same frame 196 df = _make_mixed_dtypes_df("float") 197 _check_mixed_dtypes(df) 198 199 # add lots of types 200 df = _make_mixed_dtypes_df("float", {"A": 1, "B": "foo", "C": "bar"}) 201 _check_mixed_dtypes(df) 202 203 # GH 622 204 df = _make_mixed_dtypes_df("int") 205 _check_mixed_dtypes(df) 206 207 def test_constructor_complex_dtypes(self): 208 # GH10952 209 a = np.random.rand(10).astype(np.complex64) 210 b = np.random.rand(10).astype(np.complex128) 211 212 df = DataFrame({"a": a, "b": b}) 213 assert a.dtype == df.a.dtype 214 assert b.dtype == df.b.dtype 215 216 def test_constructor_dtype_str_na_values(self, string_dtype): 217 # https://github.com/pandas-dev/pandas/issues/21083 218 df = DataFrame({"A": ["x", None]}, dtype=string_dtype) 219 result = df.isna() 220 expected = DataFrame({"A": [False, True]}) 221 tm.assert_frame_equal(result, expected) 222 assert df.iloc[1, 0] is None 223 224 df = DataFrame({"A": ["x", np.nan]}, dtype=string_dtype) 225 assert np.isnan(df.iloc[1, 0]) 226 227 def test_constructor_rec(self, float_frame): 228 rec = float_frame.to_records(index=False) 229 rec.dtype.names = list(rec.dtype.names)[::-1] 230 231 index = float_frame.index 232 233 df = DataFrame(rec) 234 tm.assert_index_equal(df.columns, Index(rec.dtype.names)) 235 236 df2 = DataFrame(rec, index=index) 237 tm.assert_index_equal(df2.columns, Index(rec.dtype.names)) 238 tm.assert_index_equal(df2.index, index) 239 240 rng = np.arange(len(rec))[::-1] 241 df3 = DataFrame(rec, index=rng, columns=["C", "B"]) 242 expected = DataFrame(rec, index=rng).reindex(columns=["C", "B"]) 243 tm.assert_frame_equal(df3, expected) 244 245 def test_constructor_bool(self): 246 df = DataFrame({0: np.ones(10, dtype=bool), 1: np.zeros(10, dtype=bool)}) 247 assert df.values.dtype == np.bool_ 248 249 def test_constructor_overflow_int64(self): 250 # see gh-14881 251 values = np.array([2 ** 64 - i for i in range(1, 10)], dtype=np.uint64) 252 253 result = DataFrame({"a": values}) 254 assert result["a"].dtype == np.uint64 255 256 # see gh-2355 257 data_scores = [ 258 (6311132704823138710, 273), 259 (2685045978526272070, 23), 260 (8921811264899370420, 45), 261 (17019687244989530680, 270), 262 (9930107427299601010, 273), 263 ] 264 dtype = [("uid", "u8"), ("score", "u8")] 265 data = np.zeros((len(data_scores),), dtype=dtype) 266 data[:] = data_scores 267 df_crawls = DataFrame(data) 268 assert df_crawls["uid"].dtype == np.uint64 269 270 @pytest.mark.parametrize( 271 "values", 272 [ 273 np.array([2 ** 64], dtype=object), 274 np.array([2 ** 65]), 275 [2 ** 64 + 1], 276 np.array([-(2 ** 63) - 4], dtype=object), 277 np.array([-(2 ** 64) - 1]), 278 [-(2 ** 65) - 2], 279 ], 280 ) 281 def test_constructor_int_overflow(self, values): 282 # see gh-18584 283 value = values[0] 284 result = DataFrame(values) 285 286 assert result[0].dtype == object 287 assert result[0][0] == value 288 289 def test_constructor_ordereddict(self): 290 import random 291 292 nitems = 100 293 nums = list(range(nitems)) 294 random.shuffle(nums) 295 expected = [f"A{i:d}" for i in nums] 296 df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) 297 assert expected == list(df.columns) 298 299 def test_constructor_dict(self): 300 datetime_series = tm.makeTimeSeries(nper=30) 301 # test expects index shifted by 5 302 datetime_series_short = tm.makeTimeSeries(nper=30)[5:] 303 304 frame = DataFrame({"col1": datetime_series, "col2": datetime_series_short}) 305 306 # col2 is padded with NaN 307 assert len(datetime_series) == 30 308 assert len(datetime_series_short) == 25 309 310 tm.assert_series_equal(frame["col1"], datetime_series.rename("col1")) 311 312 exp = Series( 313 np.concatenate([[np.nan] * 5, datetime_series_short.values]), 314 index=datetime_series.index, 315 name="col2", 316 ) 317 tm.assert_series_equal(exp, frame["col2"]) 318 319 frame = DataFrame( 320 {"col1": datetime_series, "col2": datetime_series_short}, 321 columns=["col2", "col3", "col4"], 322 ) 323 324 assert len(frame) == len(datetime_series_short) 325 assert "col1" not in frame 326 assert isna(frame["col3"]).all() 327 328 # Corner cases 329 assert len(DataFrame()) == 0 330 331 # mix dict and array, wrong size - no spec for which error should raise 332 # first 333 msg = "Mixing dicts with non-Series may lead to ambiguous ordering." 334 with pytest.raises(ValueError, match=msg): 335 DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) 336 337 # Length-one dict micro-optimization 338 frame = DataFrame({"A": {"1": 1, "2": 2}}) 339 tm.assert_index_equal(frame.index, Index(["1", "2"])) 340 341 # empty dict plus index 342 idx = Index([0, 1, 2]) 343 frame = DataFrame({}, index=idx) 344 assert frame.index is idx 345 346 # empty dict with index and columns 347 idx = Index([0, 1, 2]) 348 frame = DataFrame({}, index=idx, columns=idx) 349 assert frame.index is idx 350 assert frame.columns is idx 351 assert len(frame._series) == 3 352 353 # with dict of empty list and Series 354 frame = DataFrame({"A": [], "B": []}, columns=["A", "B"]) 355 tm.assert_index_equal(frame.index, RangeIndex(0), exact=True) 356 357 # GH 14381 358 # Dict with None value 359 frame_none = DataFrame({"a": None}, index=[0]) 360 frame_none_list = DataFrame({"a": [None]}, index=[0]) 361 assert frame_none._get_value(0, "a") is None 362 assert frame_none_list._get_value(0, "a") is None 363 tm.assert_frame_equal(frame_none, frame_none_list) 364 365 # GH10856 366 # dict with scalar values should raise error, even if columns passed 367 msg = "If using all scalar values, you must pass an index" 368 with pytest.raises(ValueError, match=msg): 369 DataFrame({"a": 0.7}) 370 371 with pytest.raises(ValueError, match=msg): 372 DataFrame({"a": 0.7}, columns=["a"]) 373 374 @pytest.mark.parametrize("scalar", [2, np.nan, None, "D"]) 375 def test_constructor_invalid_items_unused(self, scalar): 376 # No error if invalid (scalar) value is in fact not used: 377 result = DataFrame({"a": scalar}, columns=["b"]) 378 expected = DataFrame(columns=["b"]) 379 tm.assert_frame_equal(result, expected) 380 381 @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")]) 382 def test_constructor_dict_nan_key(self, value): 383 # GH 18455 384 cols = [1, value, 3] 385 idx = ["a", value] 386 values = [[0, 3], [1, 4], [2, 5]] 387 data = {cols[c]: Series(values[c], index=idx) for c in range(3)} 388 result = DataFrame(data).sort_values(1).sort_values("a", axis=1) 389 expected = DataFrame( 390 np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols 391 ) 392 tm.assert_frame_equal(result, expected) 393 394 result = DataFrame(data, index=idx).sort_values("a", axis=1) 395 tm.assert_frame_equal(result, expected) 396 397 result = DataFrame(data, index=idx, columns=cols) 398 tm.assert_frame_equal(result, expected) 399 400 @pytest.mark.parametrize("value", [np.nan, None, float("nan")]) 401 def test_constructor_dict_nan_tuple_key(self, value): 402 # GH 18455 403 cols = Index([(11, 21), (value, 22), (13, value)]) 404 idx = Index([("a", value), (value, 2)]) 405 values = [[0, 3], [1, 4], [2, 5]] 406 data = {cols[c]: Series(values[c], index=idx) for c in range(3)} 407 result = DataFrame(data).sort_values((11, 21)).sort_values(("a", value), axis=1) 408 expected = DataFrame( 409 np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols 410 ) 411 tm.assert_frame_equal(result, expected) 412 413 result = DataFrame(data, index=idx).sort_values(("a", value), axis=1) 414 tm.assert_frame_equal(result, expected) 415 416 result = DataFrame(data, index=idx, columns=cols) 417 tm.assert_frame_equal(result, expected) 418 419 def test_constructor_dict_order_insertion(self): 420 datetime_series = tm.makeTimeSeries(nper=30) 421 datetime_series_short = tm.makeTimeSeries(nper=25) 422 423 # GH19018 424 # initialization ordering: by insertion order if python>= 3.6 425 d = {"b": datetime_series_short, "a": datetime_series} 426 frame = DataFrame(data=d) 427 expected = DataFrame(data=d, columns=list("ba")) 428 tm.assert_frame_equal(frame, expected) 429 430 def test_constructor_dict_nan_key_and_columns(self): 431 # GH 16894 432 result = DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2]) 433 expected = DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2]) 434 tm.assert_frame_equal(result, expected) 435 436 def test_constructor_multi_index(self): 437 # GH 4078 438 # construction error with mi and all-nan frame 439 tuples = [(2, 3), (3, 3), (3, 3)] 440 mi = MultiIndex.from_tuples(tuples) 441 df = DataFrame(index=mi, columns=mi) 442 assert isna(df).values.ravel().all() 443 444 tuples = [(3, 3), (2, 3), (3, 3)] 445 mi = MultiIndex.from_tuples(tuples) 446 df = DataFrame(index=mi, columns=mi) 447 assert isna(df).values.ravel().all() 448 449 def test_constructor_2d_index(self): 450 # GH 25416 451 # handling of 2d index in construction 452 df = DataFrame([[1]], columns=[[1]], index=[1, 2]) 453 expected = DataFrame( 454 [1, 1], 455 index=pd.Int64Index([1, 2], dtype="int64"), 456 columns=MultiIndex(levels=[[1]], codes=[[0]]), 457 ) 458 tm.assert_frame_equal(df, expected) 459 460 df = DataFrame([[1]], columns=[[1]], index=[[1, 2]]) 461 expected = DataFrame( 462 [1, 1], 463 index=MultiIndex(levels=[[1, 2]], codes=[[0, 1]]), 464 columns=MultiIndex(levels=[[1]], codes=[[0]]), 465 ) 466 tm.assert_frame_equal(df, expected) 467 468 def test_constructor_error_msgs(self): 469 msg = "Empty data passed with indices specified." 470 # passing an empty array with columns specified. 471 with pytest.raises(ValueError, match=msg): 472 DataFrame(np.empty(0), columns=list("abc")) 473 474 msg = "Mixing dicts with non-Series may lead to ambiguous ordering." 475 # mix dict and array, wrong size 476 with pytest.raises(ValueError, match=msg): 477 DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) 478 479 # wrong size ndarray, GH 3105 480 msg = r"Shape of passed values is \(4, 3\), indices imply \(3, 3\)" 481 with pytest.raises(ValueError, match=msg): 482 DataFrame( 483 np.arange(12).reshape((4, 3)), 484 columns=["foo", "bar", "baz"], 485 index=date_range("2000-01-01", periods=3), 486 ) 487 488 arr = np.array([[4, 5, 6]]) 489 msg = r"Shape of passed values is \(1, 3\), indices imply \(1, 4\)" 490 with pytest.raises(ValueError, match=msg): 491 DataFrame(index=[0], columns=range(0, 4), data=arr) 492 493 arr = np.array([4, 5, 6]) 494 msg = r"Shape of passed values is \(3, 1\), indices imply \(1, 4\)" 495 with pytest.raises(ValueError, match=msg): 496 DataFrame(index=[0], columns=range(0, 4), data=arr) 497 498 # higher dim raise exception 499 with pytest.raises(ValueError, match="Must pass 2-d input"): 500 DataFrame(np.zeros((3, 3, 3)), columns=["A", "B", "C"], index=[1]) 501 502 # wrong size axis labels 503 msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" 504 with pytest.raises(ValueError, match=msg): 505 DataFrame(np.random.rand(2, 3), columns=["A", "B", "C"], index=[1]) 506 507 msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" 508 with pytest.raises(ValueError, match=msg): 509 DataFrame(np.random.rand(2, 3), columns=["A", "B"], index=[1, 2]) 510 511 # gh-26429 512 msg = "2 columns passed, passed data had 10 columns" 513 with pytest.raises(ValueError, match=msg): 514 DataFrame((range(10), range(10, 20)), columns=("ones", "twos")) 515 516 msg = "If using all scalar values, you must pass an index" 517 with pytest.raises(ValueError, match=msg): 518 DataFrame({"a": False, "b": True}) 519 520 def test_constructor_subclass_dict(self, float_frame, dict_subclass): 521 # Test for passing dict subclass to constructor 522 data = { 523 "col1": dict_subclass((x, 10.0 * x) for x in range(10)), 524 "col2": dict_subclass((x, 20.0 * x) for x in range(10)), 525 } 526 df = DataFrame(data) 527 refdf = DataFrame({col: dict(val.items()) for col, val in data.items()}) 528 tm.assert_frame_equal(refdf, df) 529 530 data = dict_subclass(data.items()) 531 df = DataFrame(data) 532 tm.assert_frame_equal(refdf, df) 533 534 # try with defaultdict 535 from collections import defaultdict 536 537 data = {} 538 float_frame["B"][:10] = np.nan 539 for k, v in float_frame.items(): 540 dct = defaultdict(dict) 541 dct.update(v.to_dict()) 542 data[k] = dct 543 frame = DataFrame(data) 544 expected = frame.reindex(index=float_frame.index) 545 tm.assert_frame_equal(float_frame, expected) 546 547 def test_constructor_dict_block(self): 548 expected = np.array([[4.0, 3.0, 2.0, 1.0]]) 549 df = DataFrame( 550 {"d": [4.0], "c": [3.0], "b": [2.0], "a": [1.0]}, 551 columns=["d", "c", "b", "a"], 552 ) 553 tm.assert_numpy_array_equal(df.values, expected) 554 555 def test_constructor_dict_cast(self): 556 # cast float tests 557 test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} 558 frame = DataFrame(test_data, dtype=float) 559 assert len(frame) == 3 560 assert frame["B"].dtype == np.float64 561 assert frame["A"].dtype == np.float64 562 563 frame = DataFrame(test_data) 564 assert len(frame) == 3 565 assert frame["B"].dtype == np.object_ 566 assert frame["A"].dtype == np.float64 567 568 # can't cast to float 569 test_data = { 570 "A": dict(zip(range(20), tm.makeStringIndex(20))), 571 "B": dict(zip(range(15), np.random.randn(15))), 572 } 573 frame = DataFrame(test_data, dtype=float) 574 assert len(frame) == 20 575 assert frame["A"].dtype == np.object_ 576 assert frame["B"].dtype == np.float64 577 578 def test_constructor_dict_dont_upcast(self): 579 d = {"Col1": {"Row1": "A String", "Row2": np.nan}} 580 df = DataFrame(d) 581 assert isinstance(df["Col1"]["Row2"], float) 582 583 dm = DataFrame([[1, 2], ["a", "b"]], index=[1, 2], columns=[1, 2]) 584 assert isinstance(dm[1][1], int) 585 586 def test_constructor_dict_of_tuples(self): 587 # GH #1491 588 data = {"a": (1, 2, 3), "b": (4, 5, 6)} 589 590 result = DataFrame(data) 591 expected = DataFrame({k: list(v) for k, v in data.items()}) 592 tm.assert_frame_equal(result, expected, check_dtype=False) 593 594 def test_constructor_dict_of_ranges(self): 595 # GH 26356 596 data = {"a": range(3), "b": range(3, 6)} 597 598 result = DataFrame(data) 599 expected = DataFrame({"a": [0, 1, 2], "b": [3, 4, 5]}) 600 tm.assert_frame_equal(result, expected) 601 602 def test_constructor_dict_of_iterators(self): 603 # GH 26349 604 data = {"a": iter(range(3)), "b": reversed(range(3))} 605 606 result = DataFrame(data) 607 expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]}) 608 tm.assert_frame_equal(result, expected) 609 610 def test_constructor_dict_of_generators(self): 611 # GH 26349 612 data = {"a": (i for i in (range(3))), "b": (i for i in reversed(range(3)))} 613 result = DataFrame(data) 614 expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]}) 615 tm.assert_frame_equal(result, expected) 616 617 def test_constructor_dict_multiindex(self): 618 def check(result, expected): 619 return tm.assert_frame_equal( 620 result, 621 expected, 622 check_dtype=True, 623 check_index_type=True, 624 check_column_type=True, 625 check_names=True, 626 ) 627 628 d = { 629 ("a", "a"): {("i", "i"): 0, ("i", "j"): 1, ("j", "i"): 2}, 630 ("b", "a"): {("i", "i"): 6, ("i", "j"): 5, ("j", "i"): 4}, 631 ("b", "c"): {("i", "i"): 7, ("i", "j"): 8, ("j", "i"): 9}, 632 } 633 _d = sorted(d.items()) 634 df = DataFrame(d) 635 expected = DataFrame( 636 [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d]) 637 ).T 638 expected.index = MultiIndex.from_tuples(expected.index) 639 check(df, expected) 640 641 d["z"] = {"y": 123.0, ("i", "i"): 111, ("i", "j"): 111, ("j", "i"): 111} 642 _d.insert(0, ("z", d["z"])) 643 expected = DataFrame( 644 [x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False) 645 ).T 646 expected.index = Index(expected.index, tupleize_cols=False) 647 df = DataFrame(d) 648 df = df.reindex(columns=expected.columns, index=expected.index) 649 check(df, expected) 650 651 def test_constructor_dict_datetime64_index(self): 652 # GH 10160 653 dates_as_str = ["1984-02-19", "1988-11-06", "1989-12-03", "1990-03-15"] 654 655 def create_data(constructor): 656 return {i: {constructor(s): 2 * i} for i, s in enumerate(dates_as_str)} 657 658 data_datetime64 = create_data(np.datetime64) 659 data_datetime = create_data(lambda x: datetime.strptime(x, "%Y-%m-%d")) 660 data_Timestamp = create_data(Timestamp) 661 662 expected = DataFrame( 663 [ 664 {0: 0, 1: None, 2: None, 3: None}, 665 {0: None, 1: 2, 2: None, 3: None}, 666 {0: None, 1: None, 2: 4, 3: None}, 667 {0: None, 1: None, 2: None, 3: 6}, 668 ], 669 index=[Timestamp(dt) for dt in dates_as_str], 670 ) 671 672 result_datetime64 = DataFrame(data_datetime64) 673 result_datetime = DataFrame(data_datetime) 674 result_Timestamp = DataFrame(data_Timestamp) 675 tm.assert_frame_equal(result_datetime64, expected) 676 tm.assert_frame_equal(result_datetime, expected) 677 tm.assert_frame_equal(result_Timestamp, expected) 678 679 def test_constructor_dict_timedelta64_index(self): 680 # GH 10160 681 td_as_int = [1, 2, 3, 4] 682 683 def create_data(constructor): 684 return {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)} 685 686 data_timedelta64 = create_data(lambda x: np.timedelta64(x, "D")) 687 data_timedelta = create_data(lambda x: timedelta(days=x)) 688 data_Timedelta = create_data(lambda x: Timedelta(x, "D")) 689 690 expected = DataFrame( 691 [ 692 {0: 0, 1: None, 2: None, 3: None}, 693 {0: None, 1: 2, 2: None, 3: None}, 694 {0: None, 1: None, 2: 4, 3: None}, 695 {0: None, 1: None, 2: None, 3: 6}, 696 ], 697 index=[Timedelta(td, "D") for td in td_as_int], 698 ) 699 700 result_timedelta64 = DataFrame(data_timedelta64) 701 result_timedelta = DataFrame(data_timedelta) 702 result_Timedelta = DataFrame(data_Timedelta) 703 tm.assert_frame_equal(result_timedelta64, expected) 704 tm.assert_frame_equal(result_timedelta, expected) 705 tm.assert_frame_equal(result_Timedelta, expected) 706 707 def test_constructor_period_dict(self): 708 # PeriodIndex 709 a = pd.PeriodIndex(["2012-01", "NaT", "2012-04"], freq="M") 710 b = pd.PeriodIndex(["2012-02-01", "2012-03-01", "NaT"], freq="D") 711 df = DataFrame({"a": a, "b": b}) 712 assert df["a"].dtype == a.dtype 713 assert df["b"].dtype == b.dtype 714 715 # list of periods 716 df = DataFrame({"a": a.astype(object).tolist(), "b": b.astype(object).tolist()}) 717 assert df["a"].dtype == a.dtype 718 assert df["b"].dtype == b.dtype 719 720 def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype): 721 ea_scalar, ea_dtype = ea_scalar_and_dtype 722 df = DataFrame({"a": ea_scalar}, index=[0]) 723 assert df["a"].dtype == ea_dtype 724 725 expected = DataFrame(index=[0], columns=["a"], data=ea_scalar) 726 727 tm.assert_frame_equal(df, expected) 728 729 @pytest.mark.parametrize( 730 "data,dtype", 731 [ 732 (Period("2020-01"), PeriodDtype("M")), 733 (Interval(left=0, right=5), IntervalDtype("int64")), 734 ( 735 Timestamp("2011-01-01", tz="US/Eastern"), 736 DatetimeTZDtype(tz="US/Eastern"), 737 ), 738 ], 739 ) 740 def test_constructor_extension_scalar_data(self, data, dtype): 741 # GH 34832 742 df = DataFrame(index=[0, 1], columns=["a", "b"], data=data) 743 744 assert df["a"].dtype == dtype 745 assert df["b"].dtype == dtype 746 747 arr = pd.array([data] * 2, dtype=dtype) 748 expected = DataFrame({"a": arr, "b": arr}) 749 750 tm.assert_frame_equal(df, expected) 751 752 def test_nested_dict_frame_constructor(self): 753 rng = pd.period_range("1/1/2000", periods=5) 754 df = DataFrame(np.random.randn(10, 5), columns=rng) 755 756 data = {} 757 for col in df.columns: 758 for row in df.index: 759 data.setdefault(col, {})[row] = df._get_value(row, col) 760 761 result = DataFrame(data, columns=rng) 762 tm.assert_frame_equal(result, df) 763 764 data = {} 765 for col in df.columns: 766 for row in df.index: 767 data.setdefault(row, {})[col] = df._get_value(row, col) 768 769 result = DataFrame(data, index=rng).T 770 tm.assert_frame_equal(result, df) 771 772 def _check_basic_constructor(self, empty): 773 # mat: 2d matrix with shape (3, 2) to input. empty - makes sized 774 # objects 775 mat = empty((2, 3), dtype=float) 776 # 2-D input 777 frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) 778 779 assert len(frame.index) == 2 780 assert len(frame.columns) == 3 781 782 # 1-D input 783 frame = DataFrame(empty((3,)), columns=["A"], index=[1, 2, 3]) 784 assert len(frame.index) == 3 785 assert len(frame.columns) == 1 786 787 # cast type 788 frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64) 789 assert frame.values.dtype == np.int64 790 791 # wrong size axis labels 792 msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" 793 with pytest.raises(ValueError, match=msg): 794 DataFrame(mat, columns=["A", "B", "C"], index=[1]) 795 msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" 796 with pytest.raises(ValueError, match=msg): 797 DataFrame(mat, columns=["A", "B"], index=[1, 2]) 798 799 # higher dim raise exception 800 with pytest.raises(ValueError, match="Must pass 2-d input"): 801 DataFrame(empty((3, 3, 3)), columns=["A", "B", "C"], index=[1]) 802 803 # automatic labeling 804 frame = DataFrame(mat) 805 tm.assert_index_equal(frame.index, Index(range(2)), exact=True) 806 tm.assert_index_equal(frame.columns, Index(range(3)), exact=True) 807 808 frame = DataFrame(mat, index=[1, 2]) 809 tm.assert_index_equal(frame.columns, Index(range(3)), exact=True) 810 811 frame = DataFrame(mat, columns=["A", "B", "C"]) 812 tm.assert_index_equal(frame.index, Index(range(2)), exact=True) 813 814 # 0-length axis 815 frame = DataFrame(empty((0, 3))) 816 assert len(frame.index) == 0 817 818 frame = DataFrame(empty((3, 0))) 819 assert len(frame.columns) == 0 820 821 def test_constructor_ndarray(self): 822 self._check_basic_constructor(np.ones) 823 824 frame = DataFrame(["foo", "bar"], index=[0, 1], columns=["A"]) 825 assert len(frame) == 2 826 827 def test_constructor_maskedarray(self): 828 self._check_basic_constructor(ma.masked_all) 829 830 # Check non-masked values 831 mat = ma.masked_all((2, 3), dtype=float) 832 mat[0, 0] = 1.0 833 mat[1, 2] = 2.0 834 frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) 835 assert 1.0 == frame["A"][1] 836 assert 2.0 == frame["C"][2] 837 838 # what is this even checking?? 839 mat = ma.masked_all((2, 3), dtype=float) 840 frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) 841 assert np.all(~np.asarray(frame == frame)) 842 843 def test_constructor_maskedarray_nonfloat(self): 844 # masked int promoted to float 845 mat = ma.masked_all((2, 3), dtype=int) 846 # 2-D input 847 frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) 848 849 assert len(frame.index) == 2 850 assert len(frame.columns) == 3 851 assert np.all(~np.asarray(frame == frame)) 852 853 # cast type 854 frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.float64) 855 assert frame.values.dtype == np.float64 856 857 # Check non-masked values 858 mat2 = ma.copy(mat) 859 mat2[0, 0] = 1 860 mat2[1, 2] = 2 861 frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) 862 assert 1 == frame["A"][1] 863 assert 2 == frame["C"][2] 864 865 # masked np.datetime64 stays (use NaT as null) 866 mat = ma.masked_all((2, 3), dtype="M8[ns]") 867 # 2-D input 868 frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) 869 870 assert len(frame.index) == 2 871 assert len(frame.columns) == 3 872 assert isna(frame).values.all() 873 874 # cast type 875 frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64) 876 assert frame.values.dtype == np.int64 877 878 # Check non-masked values 879 mat2 = ma.copy(mat) 880 mat2[0, 0] = 1 881 mat2[1, 2] = 2 882 frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) 883 assert 1 == frame["A"].view("i8")[1] 884 assert 2 == frame["C"].view("i8")[2] 885 886 # masked bool promoted to object 887 mat = ma.masked_all((2, 3), dtype=bool) 888 # 2-D input 889 frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) 890 891 assert len(frame.index) == 2 892 assert len(frame.columns) == 3 893 assert np.all(~np.asarray(frame == frame)) 894 895 # cast type 896 frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=object) 897 assert frame.values.dtype == object 898 899 # Check non-masked values 900 mat2 = ma.copy(mat) 901 mat2[0, 0] = True 902 mat2[1, 2] = False 903 frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) 904 assert frame["A"][1] is True 905 assert frame["C"][2] is False 906 907 def test_constructor_maskedarray_hardened(self): 908 # Check numpy masked arrays with hard masks -- from GH24574 909 mat_hard = ma.masked_all((2, 2), dtype=float).harden_mask() 910 result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) 911 expected = DataFrame( 912 {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, 913 columns=["A", "B"], 914 index=[1, 2], 915 dtype=float, 916 ) 917 tm.assert_frame_equal(result, expected) 918 # Check case where mask is hard but no data are masked 919 mat_hard = ma.ones((2, 2), dtype=float).harden_mask() 920 result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) 921 expected = DataFrame( 922 {"A": [1.0, 1.0], "B": [1.0, 1.0]}, 923 columns=["A", "B"], 924 index=[1, 2], 925 dtype=float, 926 ) 927 tm.assert_frame_equal(result, expected) 928 929 def test_constructor_maskedrecarray_dtype(self): 930 # Ensure constructor honors dtype 931 data = np.ma.array( 932 np.ma.zeros(5, dtype=[("date", "<f8"), ("price", "<f8")]), mask=[False] * 5 933 ) 934 data = data.view(mrecords.mrecarray) 935 result = DataFrame(data, dtype=int) 936 expected = DataFrame(np.zeros((5, 2), dtype=int), columns=["date", "price"]) 937 tm.assert_frame_equal(result, expected) 938 939 def test_constructor_mrecarray(self): 940 # Ensure mrecarray produces frame identical to dict of masked arrays 941 # from GH3479 942 943 assert_fr_equal = functools.partial( 944 tm.assert_frame_equal, check_index_type=True, check_column_type=True 945 ) 946 arrays = [ 947 ("float", np.array([1.5, 2.0])), 948 ("int", np.array([1, 2])), 949 ("str", np.array(["abc", "def"])), 950 ] 951 for name, arr in arrays[:]: 952 arrays.append( 953 ("masked1_" + name, np.ma.masked_array(arr, mask=[False, True])) 954 ) 955 arrays.append(("masked_all", np.ma.masked_all((2,)))) 956 arrays.append(("masked_none", np.ma.masked_array([1.0, 2.5], mask=False))) 957 958 # call assert_frame_equal for all selections of 3 arrays 959 for comb in itertools.combinations(arrays, 3): 960 names, data = zip(*comb) 961 mrecs = mrecords.fromarrays(data, names=names) 962 963 # fill the comb 964 comb = {k: (v.filled() if hasattr(v, "filled") else v) for k, v in comb} 965 966 expected = DataFrame(comb, columns=names) 967 result = DataFrame(mrecs) 968 assert_fr_equal(result, expected) 969 970 # specify columns 971 expected = DataFrame(comb, columns=names[::-1]) 972 result = DataFrame(mrecs, columns=names[::-1]) 973 assert_fr_equal(result, expected) 974 975 # specify index 976 expected = DataFrame(comb, columns=names, index=[1, 2]) 977 result = DataFrame(mrecs, index=[1, 2]) 978 assert_fr_equal(result, expected) 979 980 def test_constructor_corner_shape(self): 981 df = DataFrame(index=[]) 982 assert df.values.shape == (0, 0) 983 984 @pytest.mark.parametrize( 985 "data, index, columns, dtype, expected", 986 [ 987 (None, list(range(10)), ["a", "b"], object, np.object_), 988 (None, None, ["a", "b"], "int64", np.dtype("int64")), 989 (None, list(range(10)), ["a", "b"], int, np.dtype("float64")), 990 ({}, None, ["foo", "bar"], None, np.object_), 991 ({"b": 1}, list(range(10)), list("abc"), int, np.dtype("float64")), 992 ], 993 ) 994 def test_constructor_dtype(self, data, index, columns, dtype, expected): 995 df = DataFrame(data, index, columns, dtype) 996 assert df.values.dtype == expected 997 998 @pytest.mark.parametrize( 999 "data,input_dtype,expected_dtype", 1000 ( 1001 ([True, False, None], "boolean", pd.BooleanDtype), 1002 ([1.0, 2.0, None], "Float64", pd.Float64Dtype), 1003 ([1, 2, None], "Int64", pd.Int64Dtype), 1004 (["a", "b", "c"], "string", pd.StringDtype), 1005 ), 1006 ) 1007 def test_constructor_dtype_nullable_extension_arrays( 1008 self, data, input_dtype, expected_dtype 1009 ): 1010 df = DataFrame({"a": data}, dtype=input_dtype) 1011 assert df["a"].dtype == expected_dtype() 1012 1013 def test_constructor_scalar_inference(self): 1014 data = {"int": 1, "bool": True, "float": 3.0, "complex": 4j, "object": "foo"} 1015 df = DataFrame(data, index=np.arange(10)) 1016 1017 assert df["int"].dtype == np.int64 1018 assert df["bool"].dtype == np.bool_ 1019 assert df["float"].dtype == np.float64 1020 assert df["complex"].dtype == np.complex128 1021 assert df["object"].dtype == np.object_ 1022 1023 def test_constructor_arrays_and_scalars(self): 1024 df = DataFrame({"a": np.random.randn(10), "b": True}) 1025 exp = DataFrame({"a": df["a"].values, "b": [True] * 10}) 1026 1027 tm.assert_frame_equal(df, exp) 1028 with pytest.raises(ValueError, match="must pass an index"): 1029 DataFrame({"a": False, "b": True}) 1030 1031 def test_constructor_DataFrame(self, float_frame): 1032 df = DataFrame(float_frame) 1033 tm.assert_frame_equal(df, float_frame) 1034 1035 df_casted = DataFrame(float_frame, dtype=np.int64) 1036 assert df_casted.values.dtype == np.int64 1037 1038 def test_constructor_more(self, float_frame): 1039 # used to be in test_matrix.py 1040 arr = np.random.randn(10) 1041 dm = DataFrame(arr, columns=["A"], index=np.arange(10)) 1042 assert dm.values.ndim == 2 1043 1044 arr = np.random.randn(0) 1045 dm = DataFrame(arr) 1046 assert dm.values.ndim == 2 1047 assert dm.values.ndim == 2 1048 1049 # no data specified 1050 dm = DataFrame(columns=["A", "B"], index=np.arange(10)) 1051 assert dm.values.shape == (10, 2) 1052 1053 dm = DataFrame(columns=["A", "B"]) 1054 assert dm.values.shape == (0, 2) 1055 1056 dm = DataFrame(index=np.arange(10)) 1057 assert dm.values.shape == (10, 0) 1058 1059 # can't cast 1060 mat = np.array(["foo", "bar"], dtype=object).reshape(2, 1) 1061 with pytest.raises(ValueError, match="cast"): 1062 DataFrame(mat, index=[0, 1], columns=[0], dtype=float) 1063 1064 dm = DataFrame(DataFrame(float_frame._series)) 1065 tm.assert_frame_equal(dm, float_frame) 1066 1067 # int cast 1068 dm = DataFrame( 1069 {"A": np.ones(10, dtype=int), "B": np.ones(10, dtype=np.float64)}, 1070 index=np.arange(10), 1071 ) 1072 1073 assert len(dm.columns) == 2 1074 assert dm.values.dtype == np.float64 1075 1076 def test_constructor_empty_list(self): 1077 df = DataFrame([], index=[]) 1078 expected = DataFrame(index=[]) 1079 tm.assert_frame_equal(df, expected) 1080 1081 # GH 9939 1082 df = DataFrame([], columns=["A", "B"]) 1083 expected = DataFrame({}, columns=["A", "B"]) 1084 tm.assert_frame_equal(df, expected) 1085 1086 # Empty generator: list(empty_gen()) == [] 1087 def empty_gen(): 1088 return 1089 yield 1090 1091 df = DataFrame(empty_gen(), columns=["A", "B"]) 1092 tm.assert_frame_equal(df, expected) 1093 1094 def test_constructor_list_of_lists(self): 1095 # GH #484 1096 df = DataFrame(data=[[1, "a"], [2, "b"]], columns=["num", "str"]) 1097 assert is_integer_dtype(df["num"]) 1098 assert df["str"].dtype == np.object_ 1099 1100 # GH 4851 1101 # list of 0-dim ndarrays 1102 expected = DataFrame({0: np.arange(10)}) 1103 data = [np.array(x) for x in range(10)] 1104 result = DataFrame(data) 1105 tm.assert_frame_equal(result, expected) 1106 1107 def test_constructor_list_like_data_nested_list_column(self): 1108 # GH 32173 1109 arrays = [list("abcd"), list("cdef")] 1110 result = DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays) 1111 1112 mi = MultiIndex.from_arrays(arrays) 1113 expected = DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=mi) 1114 1115 tm.assert_frame_equal(result, expected) 1116 1117 def test_constructor_wrong_length_nested_list_column(self): 1118 # GH 32173 1119 arrays = [list("abc"), list("cde")] 1120 1121 msg = "3 columns passed, passed data had 4" 1122 with pytest.raises(ValueError, match=msg): 1123 DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays) 1124 1125 def test_constructor_unequal_length_nested_list_column(self): 1126 # GH 32173 1127 arrays = [list("abcd"), list("cde")] 1128 1129 msg = "Length of columns passed for MultiIndex columns is different" 1130 with pytest.raises(ValueError, match=msg): 1131 DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays) 1132 1133 def test_constructor_sequence_like(self): 1134 # GH 3783 1135 # collections.Squence like 1136 1137 class DummyContainer(abc.Sequence): 1138 def __init__(self, lst): 1139 self._lst = lst 1140 1141 def __getitem__(self, n): 1142 return self._lst.__getitem__(n) 1143 1144 def __len__(self, n): 1145 return self._lst.__len__() 1146 1147 lst_containers = [DummyContainer([1, "a"]), DummyContainer([2, "b"])] 1148 columns = ["num", "str"] 1149 result = DataFrame(lst_containers, columns=columns) 1150 expected = DataFrame([[1, "a"], [2, "b"]], columns=columns) 1151 tm.assert_frame_equal(result, expected, check_dtype=False) 1152 1153 # GH 4297 1154 # support Array 1155 import array 1156 1157 result = DataFrame({"A": array.array("i", range(10))}) 1158 expected = DataFrame({"A": list(range(10))}) 1159 tm.assert_frame_equal(result, expected, check_dtype=False) 1160 1161 expected = DataFrame([list(range(10)), list(range(10))]) 1162 result = DataFrame([array.array("i", range(10)), array.array("i", range(10))]) 1163 tm.assert_frame_equal(result, expected, check_dtype=False) 1164 1165 def test_constructor_range(self): 1166 # GH26342 1167 result = DataFrame(range(10)) 1168 expected = DataFrame(list(range(10))) 1169 tm.assert_frame_equal(result, expected) 1170 1171 def test_constructor_list_of_ranges(self): 1172 result = DataFrame([range(10), range(10)]) 1173 expected = DataFrame([list(range(10)), list(range(10))]) 1174 tm.assert_frame_equal(result, expected) 1175 1176 def test_constructor_iterable(self): 1177 # GH 21987 1178 class Iter: 1179 def __iter__(self): 1180 for i in range(10): 1181 yield [1, 2, 3] 1182 1183 expected = DataFrame([[1, 2, 3]] * 10) 1184 result = DataFrame(Iter()) 1185 tm.assert_frame_equal(result, expected) 1186 1187 def test_constructor_iterator(self): 1188 result = DataFrame(iter(range(10))) 1189 expected = DataFrame(list(range(10))) 1190 tm.assert_frame_equal(result, expected) 1191 1192 def test_constructor_list_of_iterators(self): 1193 result = DataFrame([iter(range(10)), iter(range(10))]) 1194 expected = DataFrame([list(range(10)), list(range(10))]) 1195 tm.assert_frame_equal(result, expected) 1196 1197 def test_constructor_generator(self): 1198 # related #2305 1199 1200 gen1 = (i for i in range(10)) 1201 gen2 = (i for i in range(10)) 1202 1203 expected = DataFrame([list(range(10)), list(range(10))]) 1204 result = DataFrame([gen1, gen2]) 1205 tm.assert_frame_equal(result, expected) 1206 1207 gen = ([i, "a"] for i in range(10)) 1208 result = DataFrame(gen) 1209 expected = DataFrame({0: range(10), 1: "a"}) 1210 tm.assert_frame_equal(result, expected, check_dtype=False) 1211 1212 def test_constructor_list_of_odicts(self): 1213 data = [ 1214 OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]), 1215 OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]), 1216 OrderedDict([["a", 1.5], ["d", 6]]), 1217 OrderedDict(), 1218 OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]), 1219 OrderedDict([["b", 3], ["c", 4], ["d", 6]]), 1220 ] 1221 1222 result = DataFrame(data) 1223 expected = DataFrame.from_dict( 1224 dict(zip(range(len(data)), data)), orient="index" 1225 ) 1226 tm.assert_frame_equal(result, expected.reindex(result.index)) 1227 1228 result = DataFrame([{}]) 1229 expected = DataFrame(index=[0]) 1230 tm.assert_frame_equal(result, expected) 1231 1232 def test_constructor_single_row(self): 1233 data = [OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]])] 1234 1235 result = DataFrame(data) 1236 expected = DataFrame.from_dict(dict(zip([0], data)), orient="index").reindex( 1237 result.index 1238 ) 1239 tm.assert_frame_equal(result, expected) 1240 1241 @pytest.mark.parametrize("dict_type", [dict, OrderedDict]) 1242 def test_constructor_ordered_dict_preserve_order(self, dict_type): 1243 # see gh-13304 1244 expected = DataFrame([[2, 1]], columns=["b", "a"]) 1245 1246 data = dict_type() 1247 data["b"] = [2] 1248 data["a"] = [1] 1249 1250 result = DataFrame(data) 1251 tm.assert_frame_equal(result, expected) 1252 1253 data = dict_type() 1254 data["b"] = 2 1255 data["a"] = 1 1256 1257 result = DataFrame([data]) 1258 tm.assert_frame_equal(result, expected) 1259 1260 @pytest.mark.parametrize("dict_type", [dict, OrderedDict]) 1261 def test_constructor_ordered_dict_conflicting_orders(self, dict_type): 1262 # the first dict element sets the ordering for the DataFrame, 1263 # even if there are conflicting orders from subsequent ones 1264 row_one = dict_type() 1265 row_one["b"] = 2 1266 row_one["a"] = 1 1267 1268 row_two = dict_type() 1269 row_two["a"] = 1 1270 row_two["b"] = 2 1271 1272 row_three = {"b": 2, "a": 1} 1273 1274 expected = DataFrame([[2, 1], [2, 1]], columns=["b", "a"]) 1275 result = DataFrame([row_one, row_two]) 1276 tm.assert_frame_equal(result, expected) 1277 1278 expected = DataFrame([[2, 1], [2, 1], [2, 1]], columns=["b", "a"]) 1279 result = DataFrame([row_one, row_two, row_three]) 1280 tm.assert_frame_equal(result, expected) 1281 1282 def test_constructor_list_of_series(self): 1283 data = [ 1284 OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), 1285 OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]), 1286 ] 1287 sdict = OrderedDict(zip(["x", "y"], data)) 1288 idx = Index(["a", "b", "c"]) 1289 1290 # all named 1291 data2 = [ 1292 Series([1.5, 3, 4], idx, dtype="O", name="x"), 1293 Series([1.5, 3, 6], idx, name="y"), 1294 ] 1295 result = DataFrame(data2) 1296 expected = DataFrame.from_dict(sdict, orient="index") 1297 tm.assert_frame_equal(result, expected) 1298 1299 # some unnamed 1300 data2 = [ 1301 Series([1.5, 3, 4], idx, dtype="O", name="x"), 1302 Series([1.5, 3, 6], idx), 1303 ] 1304 result = DataFrame(data2) 1305 1306 sdict = OrderedDict(zip(["x", "Unnamed 0"], data)) 1307 expected = DataFrame.from_dict(sdict, orient="index") 1308 tm.assert_frame_equal(result, expected) 1309 1310 # none named 1311 data = [ 1312 OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]), 1313 OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]), 1314 OrderedDict([["a", 1.5], ["d", 6]]), 1315 OrderedDict(), 1316 OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]), 1317 OrderedDict([["b", 3], ["c", 4], ["d", 6]]), 1318 ] 1319 data = [ 1320 create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data 1321 ] 1322 1323 result = DataFrame(data) 1324 sdict = OrderedDict(zip(range(len(data)), data)) 1325 expected = DataFrame.from_dict(sdict, orient="index") 1326 tm.assert_frame_equal(result, expected.reindex(result.index)) 1327 1328 result2 = DataFrame(data, index=np.arange(6)) 1329 tm.assert_frame_equal(result, result2) 1330 1331 result = DataFrame([Series(dtype=object)]) 1332 expected = DataFrame(index=[0]) 1333 tm.assert_frame_equal(result, expected) 1334 1335 data = [ 1336 OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), 1337 OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]), 1338 ] 1339 sdict = OrderedDict(zip(range(len(data)), data)) 1340 1341 idx = Index(["a", "b", "c"]) 1342 data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)] 1343 result = DataFrame(data2) 1344 expected = DataFrame.from_dict(sdict, orient="index") 1345 tm.assert_frame_equal(result, expected) 1346 1347 def test_constructor_list_of_series_aligned_index(self): 1348 series = [Series(i, index=["b", "a", "c"], name=str(i)) for i in range(3)] 1349 result = DataFrame(series) 1350 expected = DataFrame( 1351 {"b": [0, 1, 2], "a": [0, 1, 2], "c": [0, 1, 2]}, 1352 columns=["b", "a", "c"], 1353 index=["0", "1", "2"], 1354 ) 1355 tm.assert_frame_equal(result, expected) 1356 1357 def test_constructor_list_of_derived_dicts(self): 1358 class CustomDict(dict): 1359 pass 1360 1361 d = {"a": 1.5, "b": 3} 1362 1363 data_custom = [CustomDict(d)] 1364 data = [d] 1365 1366 result_custom = DataFrame(data_custom) 1367 result = DataFrame(data) 1368 tm.assert_frame_equal(result, result_custom) 1369 1370 def test_constructor_ragged(self): 1371 data = {"A": np.random.randn(10), "B": np.random.randn(8)} 1372 with pytest.raises(ValueError, match="arrays must all be same length"): 1373 DataFrame(data) 1374 1375 def test_constructor_scalar(self): 1376 idx = Index(range(3)) 1377 df = DataFrame({"a": 0}, index=idx) 1378 expected = DataFrame({"a": [0, 0, 0]}, index=idx) 1379 tm.assert_frame_equal(df, expected, check_dtype=False) 1380 1381 def test_constructor_Series_copy_bug(self, float_frame): 1382 df = DataFrame(float_frame["A"], index=float_frame.index, columns=["A"]) 1383 df.copy() 1384 1385 def test_constructor_mixed_dict_and_Series(self): 1386 data = {} 1387 data["A"] = {"foo": 1, "bar": 2, "baz": 3} 1388 data["B"] = Series([4, 3, 2, 1], index=["bar", "qux", "baz", "foo"]) 1389 1390 result = DataFrame(data) 1391 assert result.index.is_monotonic 1392 1393 # ordering ambiguous, raise exception 1394 with pytest.raises(ValueError, match="ambiguous ordering"): 1395 DataFrame({"A": ["a", "b"], "B": {"a": "a", "b": "b"}}) 1396 1397 # this is OK though 1398 result = DataFrame({"A": ["a", "b"], "B": Series(["a", "b"], index=["a", "b"])}) 1399 expected = DataFrame({"A": ["a", "b"], "B": ["a", "b"]}, index=["a", "b"]) 1400 tm.assert_frame_equal(result, expected) 1401 1402 def test_constructor_mixed_type_rows(self): 1403 # Issue 25075 1404 data = [[1, 2], (3, 4)] 1405 result = DataFrame(data) 1406 expected = DataFrame([[1, 2], [3, 4]]) 1407 tm.assert_frame_equal(result, expected) 1408 1409 @pytest.mark.parametrize( 1410 "tuples,lists", 1411 [ 1412 ((), []), 1413 ((()), []), 1414 (((), ()), [(), ()]), 1415 (((), ()), [[], []]), 1416 (([], []), [[], []]), 1417 (([1], [2]), [[1], [2]]), # GH 32776 1418 (([1, 2, 3], [4, 5, 6]), [[1, 2, 3], [4, 5, 6]]), 1419 ], 1420 ) 1421 def test_constructor_tuple(self, tuples, lists): 1422 # GH 25691 1423 result = DataFrame(tuples) 1424 expected = DataFrame(lists) 1425 tm.assert_frame_equal(result, expected) 1426 1427 def test_constructor_list_of_tuples(self): 1428 result = DataFrame({"A": [(1, 2), (3, 4)]}) 1429 expected = DataFrame({"A": Series([(1, 2), (3, 4)])}) 1430 tm.assert_frame_equal(result, expected) 1431 1432 def test_constructor_list_of_namedtuples(self): 1433 # GH11181 1434 from collections import namedtuple 1435 1436 named_tuple = namedtuple("Pandas", list("ab")) 1437 tuples = [named_tuple(1, 3), named_tuple(2, 4)] 1438 expected = DataFrame({"a": [1, 2], "b": [3, 4]}) 1439 result = DataFrame(tuples) 1440 tm.assert_frame_equal(result, expected) 1441 1442 # with columns 1443 expected = DataFrame({"y": [1, 2], "z": [3, 4]}) 1444 result = DataFrame(tuples, columns=["y", "z"]) 1445 tm.assert_frame_equal(result, expected) 1446 1447 def test_constructor_list_of_dataclasses(self): 1448 # GH21910 1449 from dataclasses import make_dataclass 1450 1451 Point = make_dataclass("Point", [("x", int), ("y", int)]) 1452 1453 datas = [Point(0, 3), Point(1, 3)] 1454 expected = DataFrame({"x": [0, 1], "y": [3, 3]}) 1455 result = DataFrame(datas) 1456 tm.assert_frame_equal(result, expected) 1457 1458 def test_constructor_list_of_dataclasses_with_varying_types(self): 1459 # GH21910 1460 from dataclasses import make_dataclass 1461 1462 # varying types 1463 Point = make_dataclass("Point", [("x", int), ("y", int)]) 1464 HLine = make_dataclass("HLine", [("x0", int), ("x1", int), ("y", int)]) 1465 1466 datas = [Point(0, 3), HLine(1, 3, 3)] 1467 1468 expected = DataFrame( 1469 {"x": [0, np.nan], "y": [3, 3], "x0": [np.nan, 1], "x1": [np.nan, 3]} 1470 ) 1471 result = DataFrame(datas) 1472 tm.assert_frame_equal(result, expected) 1473 1474 def test_constructor_list_of_dataclasses_error_thrown(self): 1475 # GH21910 1476 from dataclasses import make_dataclass 1477 1478 Point = make_dataclass("Point", [("x", int), ("y", int)]) 1479 1480 # expect TypeError 1481 msg = "asdict() should be called on dataclass instances" 1482 with pytest.raises(TypeError, match=re.escape(msg)): 1483 DataFrame([Point(0, 0), {"x": 1, "y": 0}]) 1484 1485 def test_constructor_list_of_dict_order(self): 1486 # GH10056 1487 data = [ 1488 {"First": 1, "Second": 4, "Third": 7, "Fourth": 10}, 1489 {"Second": 5, "First": 2, "Fourth": 11, "Third": 8}, 1490 {"Second": 6, "First": 3, "Fourth": 12, "Third": 9, "YYY": 14, "XXX": 13}, 1491 ] 1492 expected = DataFrame( 1493 { 1494 "First": [1, 2, 3], 1495 "Second": [4, 5, 6], 1496 "Third": [7, 8, 9], 1497 "Fourth": [10, 11, 12], 1498 "YYY": [None, None, 14], 1499 "XXX": [None, None, 13], 1500 } 1501 ) 1502 result = DataFrame(data) 1503 tm.assert_frame_equal(result, expected) 1504 1505 def test_constructor_orient(self, float_string_frame): 1506 data_dict = float_string_frame.T._series 1507 recons = DataFrame.from_dict(data_dict, orient="index") 1508 expected = float_string_frame.reindex(index=recons.index) 1509 tm.assert_frame_equal(recons, expected) 1510 1511 # dict of sequence 1512 a = {"hi": [32, 3, 3], "there": [3, 5, 3]} 1513 rs = DataFrame.from_dict(a, orient="index") 1514 xp = DataFrame.from_dict(a).T.reindex(list(a.keys())) 1515 tm.assert_frame_equal(rs, xp) 1516 1517 def test_constructor_from_ordered_dict(self): 1518 # GH8425 1519 a = OrderedDict( 1520 [ 1521 ("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])), 1522 ("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])), 1523 ("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])), 1524 ] 1525 ) 1526 expected = DataFrame.from_dict(a, orient="columns").T 1527 result = DataFrame.from_dict(a, orient="index") 1528 tm.assert_frame_equal(result, expected) 1529 1530 def test_from_dict_columns_parameter(self): 1531 # GH 18529 1532 # Test new columns parameter for from_dict that was added to make 1533 # from_items(..., orient='index', columns=[...]) easier to replicate 1534 result = DataFrame.from_dict( 1535 OrderedDict([("A", [1, 2]), ("B", [4, 5])]), 1536 orient="index", 1537 columns=["one", "two"], 1538 ) 1539 expected = DataFrame([[1, 2], [4, 5]], index=["A", "B"], columns=["one", "two"]) 1540 tm.assert_frame_equal(result, expected) 1541 1542 msg = "cannot use columns parameter with orient='columns'" 1543 with pytest.raises(ValueError, match=msg): 1544 DataFrame.from_dict( 1545 {"A": [1, 2], "B": [4, 5]}, 1546 orient="columns", 1547 columns=["one", "two"], 1548 ) 1549 with pytest.raises(ValueError, match=msg): 1550 DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"]) 1551 1552 @pytest.mark.parametrize( 1553 "data_dict, keys, orient", 1554 [ 1555 ({}, [], "index"), 1556 ([{("a",): 1}, {("a",): 2}], [("a",)], "columns"), 1557 ([OrderedDict([(("a",), 1), (("b",), 2)])], [("a",), ("b",)], "columns"), 1558 ([{("a", "b"): 1}], [("a", "b")], "columns"), 1559 ], 1560 ) 1561 def test_constructor_from_dict_tuples(self, data_dict, keys, orient): 1562 # GH 16769 1563 df = DataFrame.from_dict(data_dict, orient) 1564 1565 result = df.columns 1566 expected = Index(keys, dtype="object", tupleize_cols=False) 1567 1568 tm.assert_index_equal(result, expected) 1569 1570 def test_frame_dict_constructor_empty_series(self): 1571 s1 = Series( 1572 [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) 1573 ) 1574 s2 = Series( 1575 [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) 1576 ) 1577 s3 = Series(dtype=object) 1578 1579 # it works! 1580 DataFrame({"foo": s1, "bar": s2, "baz": s3}) 1581 DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2}) 1582 1583 def test_constructor_Series_named(self): 1584 a = Series([1, 2, 3], index=["a", "b", "c"], name="x") 1585 df = DataFrame(a) 1586 assert df.columns[0] == "x" 1587 tm.assert_index_equal(df.index, a.index) 1588 1589 # ndarray like 1590 arr = np.random.randn(10) 1591 s = Series(arr, name="x") 1592 df = DataFrame(s) 1593 expected = DataFrame({"x": s}) 1594 tm.assert_frame_equal(df, expected) 1595 1596 s = Series(arr, index=range(3, 13)) 1597 df = DataFrame(s) 1598 expected = DataFrame({0: s}) 1599 tm.assert_frame_equal(df, expected) 1600 1601 msg = r"Shape of passed values is \(10, 1\), indices imply \(10, 2\)" 1602 with pytest.raises(ValueError, match=msg): 1603 DataFrame(s, columns=[1, 2]) 1604 1605 # #2234 1606 a = Series([], name="x", dtype=object) 1607 df = DataFrame(a) 1608 assert df.columns[0] == "x" 1609 1610 # series with name and w/o 1611 s1 = Series(arr, name="x") 1612 df = DataFrame([s1, arr]).T 1613 expected = DataFrame({"x": s1, "Unnamed 0": arr}, columns=["x", "Unnamed 0"]) 1614 tm.assert_frame_equal(df, expected) 1615 1616 # this is a bit non-intuitive here; the series collapse down to arrays 1617 df = DataFrame([arr, s1]).T 1618 expected = DataFrame({1: s1, 0: arr}, columns=[0, 1]) 1619 tm.assert_frame_equal(df, expected) 1620 1621 def test_constructor_Series_named_and_columns(self): 1622 # GH 9232 validation 1623 1624 s0 = Series(range(5), name=0) 1625 s1 = Series(range(5), name=1) 1626 1627 # matching name and column gives standard frame 1628 tm.assert_frame_equal(DataFrame(s0, columns=[0]), s0.to_frame()) 1629 tm.assert_frame_equal(DataFrame(s1, columns=[1]), s1.to_frame()) 1630 1631 # non-matching produces empty frame 1632 assert DataFrame(s0, columns=[1]).empty 1633 assert DataFrame(s1, columns=[0]).empty 1634 1635 def test_constructor_Series_differently_indexed(self): 1636 # name 1637 s1 = Series([1, 2, 3], index=["a", "b", "c"], name="x") 1638 1639 # no name 1640 s2 = Series([1, 2, 3], index=["a", "b", "c"]) 1641 1642 other_index = Index(["a", "b"]) 1643 1644 df1 = DataFrame(s1, index=other_index) 1645 exp1 = DataFrame(s1.reindex(other_index)) 1646 assert df1.columns[0] == "x" 1647 tm.assert_frame_equal(df1, exp1) 1648 1649 df2 = DataFrame(s2, index=other_index) 1650 exp2 = DataFrame(s2.reindex(other_index)) 1651 assert df2.columns[0] == 0 1652 tm.assert_index_equal(df2.index, other_index) 1653 tm.assert_frame_equal(df2, exp2) 1654 1655 @pytest.mark.parametrize( 1656 "name_in1,name_in2,name_in3,name_out", 1657 [ 1658 ("idx", "idx", "idx", "idx"), 1659 ("idx", "idx", None, None), 1660 ("idx", None, None, None), 1661 ("idx1", "idx2", None, None), 1662 ("idx1", "idx1", "idx2", None), 1663 ("idx1", "idx2", "idx3", None), 1664 (None, None, None, None), 1665 ], 1666 ) 1667 def test_constructor_index_names(self, name_in1, name_in2, name_in3, name_out): 1668 # GH13475 1669 indices = [ 1670 Index(["a", "b", "c"], name=name_in1), 1671 Index(["b", "c", "d"], name=name_in2), 1672 Index(["c", "d", "e"], name=name_in3), 1673 ] 1674 series = { 1675 c: Series([0, 1, 2], index=i) for i, c in zip(indices, ["x", "y", "z"]) 1676 } 1677 result = DataFrame(series) 1678 1679 exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out) 1680 expected = DataFrame( 1681 { 1682 "x": [0, 1, 2, np.nan, np.nan], 1683 "y": [np.nan, 0, 1, 2, np.nan], 1684 "z": [np.nan, np.nan, 0, 1, 2], 1685 }, 1686 index=exp_ind, 1687 ) 1688 1689 tm.assert_frame_equal(result, expected) 1690 1691 def test_constructor_manager_resize(self, float_frame): 1692 index = list(float_frame.index[:5]) 1693 columns = list(float_frame.columns[:3]) 1694 1695 result = DataFrame(float_frame._mgr, index=index, columns=columns) 1696 tm.assert_index_equal(result.index, Index(index)) 1697 tm.assert_index_equal(result.columns, Index(columns)) 1698 1699 def test_constructor_mix_series_nonseries(self, float_frame): 1700 df = DataFrame( 1701 {"A": float_frame["A"], "B": list(float_frame["B"])}, columns=["A", "B"] 1702 ) 1703 tm.assert_frame_equal(df, float_frame.loc[:, ["A", "B"]]) 1704 1705 msg = "does not match index length" 1706 with pytest.raises(ValueError, match=msg): 1707 DataFrame({"A": float_frame["A"], "B": list(float_frame["B"])[:-2]}) 1708 1709 def test_constructor_miscast_na_int_dtype(self): 1710 df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64) 1711 expected = DataFrame([[np.nan, 1], [1, 0]]) 1712 tm.assert_frame_equal(df, expected) 1713 1714 def test_constructor_column_duplicates(self): 1715 # it works! #2079 1716 df = DataFrame([[8, 5]], columns=["a", "a"]) 1717 edf = DataFrame([[8, 5]]) 1718 edf.columns = ["a", "a"] 1719 1720 tm.assert_frame_equal(df, edf) 1721 1722 idf = DataFrame.from_records([(8, 5)], columns=["a", "a"]) 1723 1724 tm.assert_frame_equal(idf, edf) 1725 1726 msg = "If using all scalar values, you must pass an index" 1727 with pytest.raises(ValueError, match=msg): 1728 DataFrame.from_dict(OrderedDict([("b", 8), ("a", 5), ("a", 6)])) 1729 1730 def test_constructor_empty_with_string_dtype(self): 1731 # GH 9428 1732 expected = DataFrame(index=[0, 1], columns=[0, 1], dtype=object) 1733 1734 df = DataFrame(index=[0, 1], columns=[0, 1], dtype=str) 1735 tm.assert_frame_equal(df, expected) 1736 df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.str_) 1737 tm.assert_frame_equal(df, expected) 1738 df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.unicode_) 1739 tm.assert_frame_equal(df, expected) 1740 df = DataFrame(index=[0, 1], columns=[0, 1], dtype="U5") 1741 tm.assert_frame_equal(df, expected) 1742 1743 def test_constructor_empty_with_string_extension(self): 1744 # GH 34915 1745 expected = DataFrame(index=[], columns=["c1"], dtype="string") 1746 df = DataFrame(columns=["c1"], dtype="string") 1747 tm.assert_frame_equal(df, expected) 1748 1749 def test_constructor_single_value(self): 1750 # expecting single value upcasting here 1751 df = DataFrame(0.0, index=[1, 2, 3], columns=["a", "b", "c"]) 1752 tm.assert_frame_equal( 1753 df, DataFrame(np.zeros(df.shape).astype("float64"), df.index, df.columns) 1754 ) 1755 1756 df = DataFrame(0, index=[1, 2, 3], columns=["a", "b", "c"]) 1757 tm.assert_frame_equal( 1758 df, DataFrame(np.zeros(df.shape).astype("int64"), df.index, df.columns) 1759 ) 1760 1761 df = DataFrame("a", index=[1, 2], columns=["a", "c"]) 1762 tm.assert_frame_equal( 1763 df, 1764 DataFrame( 1765 np.array([["a", "a"], ["a", "a"]], dtype=object), 1766 index=[1, 2], 1767 columns=["a", "c"], 1768 ), 1769 ) 1770 1771 msg = "DataFrame constructor not properly called!" 1772 with pytest.raises(ValueError, match=msg): 1773 DataFrame("a", [1, 2]) 1774 with pytest.raises(ValueError, match=msg): 1775 DataFrame("a", columns=["a", "c"]) 1776 1777 msg = "incompatible data and dtype" 1778 with pytest.raises(TypeError, match=msg): 1779 DataFrame("a", [1, 2], ["a", "c"], float) 1780 1781 def test_constructor_with_datetimes(self): 1782 intname = np.dtype(np.int_).name 1783 floatname = np.dtype(np.float_).name 1784 datetime64name = np.dtype("M8[ns]").name 1785 objectname = np.dtype(np.object_).name 1786 1787 # single item 1788 df = DataFrame( 1789 { 1790 "A": 1, 1791 "B": "foo", 1792 "C": "bar", 1793 "D": Timestamp("20010101"), 1794 "E": datetime(2001, 1, 2, 0, 0), 1795 }, 1796 index=np.arange(10), 1797 ) 1798 result = df.dtypes 1799 expected = Series( 1800 [np.dtype("int64")] 1801 + [np.dtype(objectname)] * 2 1802 + [np.dtype(datetime64name)] * 2, 1803 index=list("ABCDE"), 1804 ) 1805 tm.assert_series_equal(result, expected) 1806 1807 # check with ndarray construction ndim==0 (e.g. we are passing a ndim 0 1808 # ndarray with a dtype specified) 1809 df = DataFrame( 1810 { 1811 "a": 1.0, 1812 "b": 2, 1813 "c": "foo", 1814 floatname: np.array(1.0, dtype=floatname), 1815 intname: np.array(1, dtype=intname), 1816 }, 1817 index=np.arange(10), 1818 ) 1819 result = df.dtypes 1820 expected = Series( 1821 [np.dtype("float64")] 1822 + [np.dtype("int64")] 1823 + [np.dtype("object")] 1824 + [np.dtype("float64")] 1825 + [np.dtype(intname)], 1826 index=["a", "b", "c", floatname, intname], 1827 ) 1828 tm.assert_series_equal(result, expected) 1829 1830 # check with ndarray construction ndim>0 1831 df = DataFrame( 1832 { 1833 "a": 1.0, 1834 "b": 2, 1835 "c": "foo", 1836 floatname: np.array([1.0] * 10, dtype=floatname), 1837 intname: np.array([1] * 10, dtype=intname), 1838 }, 1839 index=np.arange(10), 1840 ) 1841 result = df.dtypes 1842 expected = Series( 1843 [np.dtype("float64")] 1844 + [np.dtype("int64")] 1845 + [np.dtype("object")] 1846 + [np.dtype("float64")] 1847 + [np.dtype(intname)], 1848 index=["a", "b", "c", floatname, intname], 1849 ) 1850 tm.assert_series_equal(result, expected) 1851 1852 # GH 2809 1853 ind = date_range(start="2000-01-01", freq="D", periods=10) 1854 datetimes = [ts.to_pydatetime() for ts in ind] 1855 datetime_s = Series(datetimes) 1856 assert datetime_s.dtype == "M8[ns]" 1857 1858 # GH 2810 1859 ind = date_range(start="2000-01-01", freq="D", periods=10) 1860 datetimes = [ts.to_pydatetime() for ts in ind] 1861 dates = [ts.date() for ts in ind] 1862 df = DataFrame(datetimes, columns=["datetimes"]) 1863 df["dates"] = dates 1864 result = df.dtypes 1865 expected = Series( 1866 [np.dtype("datetime64[ns]"), np.dtype("object")], 1867 index=["datetimes", "dates"], 1868 ) 1869 tm.assert_series_equal(result, expected) 1870 1871 # GH 7594 1872 # don't coerce tz-aware 1873 import pytz 1874 1875 tz = pytz.timezone("US/Eastern") 1876 dt = tz.localize(datetime(2012, 1, 1)) 1877 1878 df = DataFrame({"End Date": dt}, index=[0]) 1879 assert df.iat[0, 0] == dt 1880 tm.assert_series_equal( 1881 df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}) 1882 ) 1883 1884 df = DataFrame([{"End Date": dt}]) 1885 assert df.iat[0, 0] == dt 1886 tm.assert_series_equal( 1887 df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}) 1888 ) 1889 1890 # tz-aware (UTC and other tz's) 1891 # GH 8411 1892 dr = date_range("20130101", periods=3) 1893 df = DataFrame({"value": dr}) 1894 assert df.iat[0, 0].tz is None 1895 dr = date_range("20130101", periods=3, tz="UTC") 1896 df = DataFrame({"value": dr}) 1897 assert str(df.iat[0, 0].tz) == "UTC" 1898 dr = date_range("20130101", periods=3, tz="US/Eastern") 1899 df = DataFrame({"value": dr}) 1900 assert str(df.iat[0, 0].tz) == "US/Eastern" 1901 1902 # GH 7822 1903 # preserver an index with a tz on dict construction 1904 i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern") 1905 1906 expected = DataFrame({"a": i.to_series().reset_index(drop=True)}) 1907 df = DataFrame() 1908 df["a"] = i 1909 tm.assert_frame_equal(df, expected) 1910 1911 df = DataFrame({"a": i}) 1912 tm.assert_frame_equal(df, expected) 1913 1914 # multiples 1915 i_no_tz = date_range("1/1/2011", periods=5, freq="10s") 1916 df = DataFrame({"a": i, "b": i_no_tz}) 1917 expected = DataFrame({"a": i.to_series().reset_index(drop=True), "b": i_no_tz}) 1918 tm.assert_frame_equal(df, expected) 1919 1920 @pytest.mark.parametrize( 1921 "arr", 1922 [ 1923 np.array([None, None, None, None, datetime.now(), None]), 1924 np.array([None, None, datetime.now(), None]), 1925 [[np.datetime64("NaT")], [None]], 1926 [[np.datetime64("NaT")], [pd.NaT]], 1927 [[None], [np.datetime64("NaT")]], 1928 [[None], [pd.NaT]], 1929 [[pd.NaT], [np.datetime64("NaT")]], 1930 [[pd.NaT], [None]], 1931 ], 1932 ) 1933 def test_constructor_datetimes_with_nulls(self, arr): 1934 # gh-15869, GH#11220 1935 result = DataFrame(arr).dtypes 1936 expected = Series([np.dtype("datetime64[ns]")]) 1937 tm.assert_series_equal(result, expected) 1938 1939 @pytest.mark.parametrize("order", ["K", "A", "C", "F"]) 1940 @pytest.mark.parametrize( 1941 "dtype", 1942 [ 1943 "datetime64[M]", 1944 "datetime64[D]", 1945 "datetime64[h]", 1946 "datetime64[m]", 1947 "datetime64[s]", 1948 "datetime64[ms]", 1949 "datetime64[us]", 1950 "datetime64[ns]", 1951 ], 1952 ) 1953 def test_constructor_datetimes_non_ns(self, order, dtype): 1954 na = np.array( 1955 [ 1956 ["2015-01-01", "2015-01-02", "2015-01-03"], 1957 ["2017-01-01", "2017-01-02", "2017-02-03"], 1958 ], 1959 dtype=dtype, 1960 order=order, 1961 ) 1962 df = DataFrame(na) 1963 expected = DataFrame( 1964 [ 1965 ["2015-01-01", "2015-01-02", "2015-01-03"], 1966 ["2017-01-01", "2017-01-02", "2017-02-03"], 1967 ] 1968 ) 1969 expected = expected.astype(dtype=dtype) 1970 tm.assert_frame_equal(df, expected) 1971 1972 @pytest.mark.parametrize("order", ["K", "A", "C", "F"]) 1973 @pytest.mark.parametrize( 1974 "dtype", 1975 [ 1976 "timedelta64[D]", 1977 "timedelta64[h]", 1978 "timedelta64[m]", 1979 "timedelta64[s]", 1980 "timedelta64[ms]", 1981 "timedelta64[us]", 1982 "timedelta64[ns]", 1983 ], 1984 ) 1985 def test_constructor_timedelta_non_ns(self, order, dtype): 1986 na = np.array( 1987 [ 1988 [np.timedelta64(1, "D"), np.timedelta64(2, "D")], 1989 [np.timedelta64(4, "D"), np.timedelta64(5, "D")], 1990 ], 1991 dtype=dtype, 1992 order=order, 1993 ) 1994 df = DataFrame(na).astype("timedelta64[ns]") 1995 expected = DataFrame( 1996 [ 1997 [Timedelta(1, "D"), Timedelta(2, "D")], 1998 [Timedelta(4, "D"), Timedelta(5, "D")], 1999 ], 2000 ) 2001 tm.assert_frame_equal(df, expected) 2002 2003 def test_constructor_for_list_with_dtypes(self): 2004 # test list of lists/ndarrays 2005 df = DataFrame([np.arange(5) for x in range(5)]) 2006 result = df.dtypes 2007 expected = Series([np.dtype("int64")] * 5) 2008 tm.assert_series_equal(result, expected) 2009 2010 df = DataFrame([np.array(np.arange(5), dtype="int32") for x in range(5)]) 2011 result = df.dtypes 2012 expected = Series([np.dtype("int64")] * 5) 2013 tm.assert_series_equal(result, expected) 2014 2015 # overflow issue? (we always expecte int64 upcasting here) 2016 df = DataFrame({"a": [2 ** 31, 2 ** 31 + 1]}) 2017 assert df.dtypes.iloc[0] == np.dtype("int64") 2018 2019 # GH #2751 (construction with no index specified), make sure we cast to 2020 # platform values 2021 df = DataFrame([1, 2]) 2022 assert df.dtypes.iloc[0] == np.dtype("int64") 2023 2024 df = DataFrame([1.0, 2.0]) 2025 assert df.dtypes.iloc[0] == np.dtype("float64") 2026 2027 df = DataFrame({"a": [1, 2]}) 2028 assert df.dtypes.iloc[0] == np.dtype("int64") 2029 2030 df = DataFrame({"a": [1.0, 2.0]}) 2031 assert df.dtypes.iloc[0] == np.dtype("float64") 2032 2033 df = DataFrame({"a": 1}, index=range(3)) 2034 assert df.dtypes.iloc[0] == np.dtype("int64") 2035 2036 df = DataFrame({"a": 1.0}, index=range(3)) 2037 assert df.dtypes.iloc[0] == np.dtype("float64") 2038 2039 # with object list 2040 df = DataFrame( 2041 { 2042 "a": [1, 2, 4, 7], 2043 "b": [1.2, 2.3, 5.1, 6.3], 2044 "c": list("abcd"), 2045 "d": [datetime(2000, 1, 1) for i in range(4)], 2046 "e": [1.0, 2, 4.0, 7], 2047 } 2048 ) 2049 result = df.dtypes 2050 expected = Series( 2051 [ 2052 np.dtype("int64"), 2053 np.dtype("float64"), 2054 np.dtype("object"), 2055 np.dtype("datetime64[ns]"), 2056 np.dtype("float64"), 2057 ], 2058 index=list("abcde"), 2059 ) 2060 tm.assert_series_equal(result, expected) 2061 2062 def test_constructor_frame_copy(self, float_frame): 2063 cop = DataFrame(float_frame, copy=True) 2064 cop["A"] = 5 2065 assert (cop["A"] == 5).all() 2066 assert not (float_frame["A"] == 5).all() 2067 2068 def test_constructor_ndarray_copy(self, float_frame): 2069 df = DataFrame(float_frame.values) 2070 2071 float_frame.values[5] = 5 2072 assert (df.values[5] == 5).all() 2073 2074 df = DataFrame(float_frame.values, copy=True) 2075 float_frame.values[6] = 6 2076 assert not (df.values[6] == 6).all() 2077 2078 def test_constructor_series_copy(self, float_frame): 2079 series = float_frame._series 2080 2081 df = DataFrame({"A": series["A"]}) 2082 df["A"][:] = 5 2083 2084 assert not (series["A"] == 5).all() 2085 2086 def test_constructor_with_nas(self): 2087 # GH 5016 2088 # na's in indices 2089 2090 def check(df): 2091 for i in range(len(df.columns)): 2092 df.iloc[:, i] 2093 2094 indexer = np.arange(len(df.columns))[isna(df.columns)] 2095 2096 # No NaN found -> error 2097 if len(indexer) == 0: 2098 with pytest.raises(KeyError, match="^nan$"): 2099 df.loc[:, np.nan] 2100 # single nan should result in Series 2101 elif len(indexer) == 1: 2102 tm.assert_series_equal(df.iloc[:, indexer[0]], df.loc[:, np.nan]) 2103 # multiple nans should result in DataFrame 2104 else: 2105 tm.assert_frame_equal(df.iloc[:, indexer], df.loc[:, np.nan]) 2106 2107 df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan]) 2108 check(df) 2109 2110 df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1.1, 2.2, np.nan]) 2111 check(df) 2112 2113 df = DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan]) 2114 check(df) 2115 2116 df = DataFrame( 2117 [[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan] 2118 ) 2119 check(df) 2120 2121 # GH 21428 (non-unique columns) 2122 df = DataFrame([[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1, 2, 2]) 2123 check(df) 2124 2125 def test_constructor_lists_to_object_dtype(self): 2126 # from #1074 2127 d = DataFrame({"a": [np.nan, False]}) 2128 assert d["a"].dtype == np.object_ 2129 assert not d["a"][1] 2130 2131 def test_constructor_categorical(self): 2132 2133 # GH8626 2134 2135 # dict creation 2136 df = DataFrame({"A": list("abc")}, dtype="category") 2137 expected = Series(list("abc"), dtype="category", name="A") 2138 tm.assert_series_equal(df["A"], expected) 2139 2140 # to_frame 2141 s = Series(list("abc"), dtype="category") 2142 result = s.to_frame() 2143 expected = Series(list("abc"), dtype="category", name=0) 2144 tm.assert_series_equal(result[0], expected) 2145 result = s.to_frame(name="foo") 2146 expected = Series(list("abc"), dtype="category", name="foo") 2147 tm.assert_series_equal(result["foo"], expected) 2148 2149 # list-like creation 2150 df = DataFrame(list("abc"), dtype="category") 2151 expected = Series(list("abc"), dtype="category", name=0) 2152 tm.assert_series_equal(df[0], expected) 2153 2154 # ndim != 1 2155 df = DataFrame([Categorical(list("abc"))]) 2156 expected = DataFrame({0: Series(list("abc"), dtype="category")}) 2157 tm.assert_frame_equal(df, expected) 2158 2159 df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) 2160 expected = DataFrame( 2161 { 2162 0: Series(list("abc"), dtype="category"), 2163 1: Series(list("abd"), dtype="category"), 2164 }, 2165 columns=[0, 1], 2166 ) 2167 tm.assert_frame_equal(df, expected) 2168 2169 # mixed 2170 df = DataFrame([Categorical(list("abc")), list("def")]) 2171 expected = DataFrame( 2172 {0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1] 2173 ) 2174 tm.assert_frame_equal(df, expected) 2175 2176 # invalid (shape) 2177 msg = r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)" 2178 with pytest.raises(ValueError, match=msg): 2179 DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) 2180 2181 # ndim > 1 2182 msg = "> 1 ndim Categorical are not supported at this time" 2183 with pytest.raises(NotImplementedError, match=msg): 2184 Categorical(np.array([list("abcd")])) 2185 2186 def test_constructor_categorical_series(self): 2187 2188 items = [1, 2, 3, 1] 2189 exp = Series(items).astype("category") 2190 res = Series(items, dtype="category") 2191 tm.assert_series_equal(res, exp) 2192 2193 items = ["a", "b", "c", "a"] 2194 exp = Series(items).astype("category") 2195 res = Series(items, dtype="category") 2196 tm.assert_series_equal(res, exp) 2197 2198 # insert into frame with different index 2199 # GH 8076 2200 index = date_range("20000101", periods=3) 2201 expected = Series( 2202 Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"]) 2203 ) 2204 expected.index = index 2205 2206 expected = DataFrame({"x": expected}) 2207 df = DataFrame({"x": Series(["a", "b", "c"], dtype="category")}, index=index) 2208 tm.assert_frame_equal(df, expected) 2209 2210 def test_from_records_to_records(self): 2211 # from numpy documentation 2212 arr = np.zeros((2,), dtype=("i4,f4,a10")) 2213 arr[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")] 2214 2215 # TODO(wesm): unused 2216 frame = DataFrame.from_records(arr) # noqa 2217 2218 index = Index(np.arange(len(arr))[::-1]) 2219 indexed_frame = DataFrame.from_records(arr, index=index) 2220 tm.assert_index_equal(indexed_frame.index, index) 2221 2222 # without names, it should go to last ditch 2223 arr2 = np.zeros((2, 3)) 2224 tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2)) 2225 2226 # wrong length 2227 msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" 2228 with pytest.raises(ValueError, match=msg): 2229 DataFrame.from_records(arr, index=index[:-1]) 2230 2231 indexed_frame = DataFrame.from_records(arr, index="f1") 2232 2233 # what to do? 2234 records = indexed_frame.to_records() 2235 assert len(records.dtype.names) == 3 2236 2237 records = indexed_frame.to_records(index=False) 2238 assert len(records.dtype.names) == 2 2239 assert "index" not in records.dtype.names 2240 2241 def test_from_records_nones(self): 2242 tuples = [(1, 2, None, 3), (1, 2, None, 3), (None, 2, 5, 3)] 2243 2244 df = DataFrame.from_records(tuples, columns=["a", "b", "c", "d"]) 2245 assert np.isnan(df["c"][0]) 2246 2247 def test_from_records_iterator(self): 2248 arr = np.array( 2249 [(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5.0, 5.0, 6, 6), (7.0, 7.0, 8, 8)], 2250 dtype=[ 2251 ("x", np.float64), 2252 ("u", np.float32), 2253 ("y", np.int64), 2254 ("z", np.int32), 2255 ], 2256 ) 2257 df = DataFrame.from_records(iter(arr), nrows=2) 2258 xp = DataFrame( 2259 { 2260 "x": np.array([1.0, 3.0], dtype=np.float64), 2261 "u": np.array([1.0, 3.0], dtype=np.float32), 2262 "y": np.array([2, 4], dtype=np.int64), 2263 "z": np.array([2, 4], dtype=np.int32), 2264 } 2265 ) 2266 tm.assert_frame_equal(df.reindex_like(xp), xp) 2267 2268 # no dtypes specified here, so just compare with the default 2269 arr = [(1.0, 2), (3.0, 4), (5.0, 6), (7.0, 8)] 2270 df = DataFrame.from_records(iter(arr), columns=["x", "y"], nrows=2) 2271 tm.assert_frame_equal(df, xp.reindex(columns=["x", "y"]), check_dtype=False) 2272 2273 def test_from_records_tuples_generator(self): 2274 def tuple_generator(length): 2275 for i in range(length): 2276 letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 2277 yield (i, letters[i % len(letters)], i / length) 2278 2279 columns_names = ["Integer", "String", "Float"] 2280 columns = [ 2281 [i[j] for i in tuple_generator(10)] for j in range(len(columns_names)) 2282 ] 2283 data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]} 2284 expected = DataFrame(data, columns=columns_names) 2285 2286 generator = tuple_generator(10) 2287 result = DataFrame.from_records(generator, columns=columns_names) 2288 tm.assert_frame_equal(result, expected) 2289 2290 def test_from_records_lists_generator(self): 2291 def list_generator(length): 2292 for i in range(length): 2293 letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 2294 yield [i, letters[i % len(letters)], i / length] 2295 2296 columns_names = ["Integer", "String", "Float"] 2297 columns = [ 2298 [i[j] for i in list_generator(10)] for j in range(len(columns_names)) 2299 ] 2300 data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]} 2301 expected = DataFrame(data, columns=columns_names) 2302 2303 generator = list_generator(10) 2304 result = DataFrame.from_records(generator, columns=columns_names) 2305 tm.assert_frame_equal(result, expected) 2306 2307 def test_from_records_columns_not_modified(self): 2308 tuples = [(1, 2, 3), (1, 2, 3), (2, 5, 3)] 2309 2310 columns = ["a", "b", "c"] 2311 original_columns = list(columns) 2312 2313 df = DataFrame.from_records(tuples, columns=columns, index="a") # noqa 2314 2315 assert columns == original_columns 2316 2317 def test_from_records_decimal(self): 2318 from decimal import Decimal 2319 2320 tuples = [(Decimal("1.5"),), (Decimal("2.5"),), (None,)] 2321 2322 df = DataFrame.from_records(tuples, columns=["a"]) 2323 assert df["a"].dtype == object 2324 2325 df = DataFrame.from_records(tuples, columns=["a"], coerce_float=True) 2326 assert df["a"].dtype == np.float64 2327 assert np.isnan(df["a"].values[-1]) 2328 2329 def test_from_records_duplicates(self): 2330 result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"]) 2331 2332 expected = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"]) 2333 2334 tm.assert_frame_equal(result, expected) 2335 2336 def test_from_records_set_index_name(self): 2337 def create_dict(order_id): 2338 return { 2339 "order_id": order_id, 2340 "quantity": np.random.randint(1, 10), 2341 "price": np.random.randint(1, 10), 2342 } 2343 2344 documents = [create_dict(i) for i in range(10)] 2345 # demo missing data 2346 documents.append({"order_id": 10, "quantity": 5}) 2347 2348 result = DataFrame.from_records(documents, index="order_id") 2349 assert result.index.name == "order_id" 2350 2351 # MultiIndex 2352 result = DataFrame.from_records(documents, index=["order_id", "quantity"]) 2353 assert result.index.names == ("order_id", "quantity") 2354 2355 def test_from_records_misc_brokenness(self): 2356 # #2179 2357 2358 data = {1: ["foo"], 2: ["bar"]} 2359 2360 result = DataFrame.from_records(data, columns=["a", "b"]) 2361 exp = DataFrame(data, columns=["a", "b"]) 2362 tm.assert_frame_equal(result, exp) 2363 2364 # overlap in index/index_names 2365 2366 data = {"a": [1, 2, 3], "b": [4, 5, 6]} 2367 2368 result = DataFrame.from_records(data, index=["a", "b", "c"]) 2369 exp = DataFrame(data, index=["a", "b", "c"]) 2370 tm.assert_frame_equal(result, exp) 2371 2372 # GH 2623 2373 rows = [] 2374 rows.append([datetime(2010, 1, 1), 1]) 2375 rows.append([datetime(2010, 1, 2), "hi"]) # test col upconverts to obj 2376 df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) 2377 result = df2_obj.dtypes 2378 expected = Series( 2379 [np.dtype("datetime64[ns]"), np.dtype("object")], index=["date", "test"] 2380 ) 2381 tm.assert_series_equal(result, expected) 2382 2383 rows = [] 2384 rows.append([datetime(2010, 1, 1), 1]) 2385 rows.append([datetime(2010, 1, 2), 1]) 2386 df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) 2387 result = df2_obj.dtypes 2388 expected = Series( 2389 [np.dtype("datetime64[ns]"), np.dtype("int64")], index=["date", "test"] 2390 ) 2391 tm.assert_series_equal(result, expected) 2392 2393 def test_from_records_empty(self): 2394 # 3562 2395 result = DataFrame.from_records([], columns=["a", "b", "c"]) 2396 expected = DataFrame(columns=["a", "b", "c"]) 2397 tm.assert_frame_equal(result, expected) 2398 2399 result = DataFrame.from_records([], columns=["a", "b", "b"]) 2400 expected = DataFrame(columns=["a", "b", "b"]) 2401 tm.assert_frame_equal(result, expected) 2402 2403 def test_from_records_empty_with_nonempty_fields_gh3682(self): 2404 a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)]) 2405 df = DataFrame.from_records(a, index="id") 2406 tm.assert_index_equal(df.index, Index([1], name="id")) 2407 assert df.index.name == "id" 2408 tm.assert_index_equal(df.columns, Index(["value"])) 2409 2410 b = np.array([], dtype=[("id", np.int64), ("value", np.int64)]) 2411 df = DataFrame.from_records(b, index="id") 2412 tm.assert_index_equal(df.index, Index([], name="id")) 2413 assert df.index.name == "id" 2414 2415 @pytest.mark.parametrize( 2416 "dtype", 2417 tm.ALL_INT_DTYPES 2418 + tm.ALL_EA_INT_DTYPES 2419 + tm.FLOAT_DTYPES 2420 + tm.COMPLEX_DTYPES 2421 + tm.DATETIME64_DTYPES 2422 + tm.TIMEDELTA64_DTYPES 2423 + tm.BOOL_DTYPES, 2424 ) 2425 def test_check_dtype_empty_numeric_column(self, dtype): 2426 # GH24386: Ensure dtypes are set correctly for an empty DataFrame. 2427 # Empty DataFrame is generated via dictionary data with non-overlapping columns. 2428 data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) 2429 2430 assert data.b.dtype == dtype 2431 2432 @pytest.mark.parametrize( 2433 "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES 2434 ) 2435 def test_check_dtype_empty_string_column(self, dtype): 2436 # GH24386: Ensure dtypes are set correctly for an empty DataFrame. 2437 # Empty DataFrame is generated via dictionary data with non-overlapping columns. 2438 data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) 2439 2440 assert data.b.dtype.name == "object" 2441 2442 def test_from_records_with_datetimes(self): 2443 2444 # this may fail on certain platforms because of a numpy issue 2445 # related GH6140 2446 if not is_platform_little_endian(): 2447 pytest.skip("known failure of test on non-little endian") 2448 2449 # construction with a null in a recarray 2450 # GH 6140 2451 expected = DataFrame({"EXPIRY": [datetime(2005, 3, 1, 0, 0), None]}) 2452 2453 arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])] 2454 dtypes = [("EXPIRY", "<M8[ns]")] 2455 2456 try: 2457 recarray = np.core.records.fromarrays(arrdata, dtype=dtypes) 2458 except (ValueError): 2459 pytest.skip("known failure of numpy rec array creation") 2460 2461 result = DataFrame.from_records(recarray) 2462 tm.assert_frame_equal(result, expected) 2463 2464 # coercion should work too 2465 arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])] 2466 dtypes = [("EXPIRY", "<M8[m]")] 2467 recarray = np.core.records.fromarrays(arrdata, dtype=dtypes) 2468 result = DataFrame.from_records(recarray) 2469 tm.assert_frame_equal(result, expected) 2470 2471 def test_from_records_sequencelike(self): 2472 df = DataFrame( 2473 { 2474 "A": np.array(np.random.randn(6), dtype=np.float64), 2475 "A1": np.array(np.random.randn(6), dtype=np.float64), 2476 "B": np.array(np.arange(6), dtype=np.int64), 2477 "C": ["foo"] * 6, 2478 "D": np.array([True, False] * 3, dtype=bool), 2479 "E": np.array(np.random.randn(6), dtype=np.float32), 2480 "E1": np.array(np.random.randn(6), dtype=np.float32), 2481 "F": np.array(np.arange(6), dtype=np.int32), 2482 } 2483 ) 2484 2485 # this is actually tricky to create the recordlike arrays and 2486 # have the dtypes be intact 2487 blocks = df._to_dict_of_blocks() 2488 tuples = [] 2489 columns = [] 2490 dtypes = [] 2491 for dtype, b in blocks.items(): 2492 columns.extend(b.columns) 2493 dtypes.extend([(c, np.dtype(dtype).descr[0][1]) for c in b.columns]) 2494 for i in range(len(df.index)): 2495 tup = [] 2496 for _, b in blocks.items(): 2497 tup.extend(b.iloc[i].values) 2498 tuples.append(tuple(tup)) 2499 2500 recarray = np.array(tuples, dtype=dtypes).view(np.recarray) 2501 recarray2 = df.to_records() 2502 lists = [list(x) for x in tuples] 2503 2504 # tuples (lose the dtype info) 2505 result = DataFrame.from_records(tuples, columns=columns).reindex( 2506 columns=df.columns 2507 ) 2508 2509 # created recarray and with to_records recarray (have dtype info) 2510 result2 = DataFrame.from_records(recarray, columns=columns).reindex( 2511 columns=df.columns 2512 ) 2513 result3 = DataFrame.from_records(recarray2, columns=columns).reindex( 2514 columns=df.columns 2515 ) 2516 2517 # list of tupels (no dtype info) 2518 result4 = DataFrame.from_records(lists, columns=columns).reindex( 2519 columns=df.columns 2520 ) 2521 2522 tm.assert_frame_equal(result, df, check_dtype=False) 2523 tm.assert_frame_equal(result2, df) 2524 tm.assert_frame_equal(result3, df) 2525 tm.assert_frame_equal(result4, df, check_dtype=False) 2526 2527 # tuples is in the order of the columns 2528 result = DataFrame.from_records(tuples) 2529 tm.assert_index_equal(result.columns, RangeIndex(8)) 2530 2531 # test exclude parameter & we are casting the results here (as we don't 2532 # have dtype info to recover) 2533 columns_to_test = [columns.index("C"), columns.index("E1")] 2534 2535 exclude = list(set(range(8)) - set(columns_to_test)) 2536 result = DataFrame.from_records(tuples, exclude=exclude) 2537 result.columns = [columns[i] for i in sorted(columns_to_test)] 2538 tm.assert_series_equal(result["C"], df["C"]) 2539 tm.assert_series_equal(result["E1"], df["E1"].astype("float64")) 2540 2541 # empty case 2542 result = DataFrame.from_records([], columns=["foo", "bar", "baz"]) 2543 assert len(result) == 0 2544 tm.assert_index_equal(result.columns, Index(["foo", "bar", "baz"])) 2545 2546 result = DataFrame.from_records([]) 2547 assert len(result) == 0 2548 assert len(result.columns) == 0 2549 2550 def test_from_records_dictlike(self): 2551 2552 # test the dict methods 2553 df = DataFrame( 2554 { 2555 "A": np.array(np.random.randn(6), dtype=np.float64), 2556 "A1": np.array(np.random.randn(6), dtype=np.float64), 2557 "B": np.array(np.arange(6), dtype=np.int64), 2558 "C": ["foo"] * 6, 2559 "D": np.array([True, False] * 3, dtype=bool), 2560 "E": np.array(np.random.randn(6), dtype=np.float32), 2561 "E1": np.array(np.random.randn(6), dtype=np.float32), 2562 "F": np.array(np.arange(6), dtype=np.int32), 2563 } 2564 ) 2565 2566 # columns is in a different order here than the actual items iterated 2567 # from the dict 2568 blocks = df._to_dict_of_blocks() 2569 columns = [] 2570 for dtype, b in blocks.items(): 2571 columns.extend(b.columns) 2572 2573 asdict = {x: y for x, y in df.items()} 2574 asdict2 = {x: y.values for x, y in df.items()} 2575 2576 # dict of series & dict of ndarrays (have dtype info) 2577 results = [] 2578 results.append(DataFrame.from_records(asdict).reindex(columns=df.columns)) 2579 results.append( 2580 DataFrame.from_records(asdict, columns=columns).reindex(columns=df.columns) 2581 ) 2582 results.append( 2583 DataFrame.from_records(asdict2, columns=columns).reindex(columns=df.columns) 2584 ) 2585 2586 for r in results: 2587 tm.assert_frame_equal(r, df) 2588 2589 def test_from_records_with_index_data(self): 2590 df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"]) 2591 2592 data = np.random.randn(10) 2593 df1 = DataFrame.from_records(df, index=data) 2594 tm.assert_index_equal(df1.index, Index(data)) 2595 2596 def test_from_records_bad_index_column(self): 2597 df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"]) 2598 2599 # should pass 2600 df1 = DataFrame.from_records(df, index=["C"]) 2601 tm.assert_index_equal(df1.index, Index(df.C)) 2602 2603 df1 = DataFrame.from_records(df, index="C") 2604 tm.assert_index_equal(df1.index, Index(df.C)) 2605 2606 # should fail 2607 msg = r"Shape of passed values is \(10, 3\), indices imply \(1, 3\)" 2608 with pytest.raises(ValueError, match=msg): 2609 DataFrame.from_records(df, index=[2]) 2610 with pytest.raises(KeyError, match=r"^2$"): 2611 DataFrame.from_records(df, index=2) 2612 2613 def test_from_records_non_tuple(self): 2614 class Record: 2615 def __init__(self, *args): 2616 self.args = args 2617 2618 def __getitem__(self, i): 2619 return self.args[i] 2620 2621 def __iter__(self): 2622 return iter(self.args) 2623 2624 recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)] 2625 tups = [tuple(rec) for rec in recs] 2626 2627 result = DataFrame.from_records(recs) 2628 expected = DataFrame.from_records(tups) 2629 tm.assert_frame_equal(result, expected) 2630 2631 def test_from_records_len0_with_columns(self): 2632 # #2633 2633 result = DataFrame.from_records([], index="foo", columns=["foo", "bar"]) 2634 expected = Index(["bar"]) 2635 2636 assert len(result) == 0 2637 assert result.index.name == "foo" 2638 tm.assert_index_equal(result.columns, expected) 2639 2640 def test_from_records_series_list_dict(self): 2641 # GH27358 2642 expected = DataFrame([[{"a": 1, "b": 2}, {"a": 3, "b": 4}]]).T 2643 data = Series([[{"a": 1, "b": 2}], [{"a": 3, "b": 4}]]) 2644 result = DataFrame.from_records(data) 2645 tm.assert_frame_equal(result, expected) 2646 2647 def test_from_records_series_categorical_index(self): 2648 # GH 32805 2649 index = CategoricalIndex( 2650 [Interval(-20, -10), Interval(-10, 0), Interval(0, 10)] 2651 ) 2652 series_of_dicts = Series([{"a": 1}, {"a": 2}, {"b": 3}], index=index) 2653 frame = DataFrame.from_records(series_of_dicts, index=index) 2654 expected = DataFrame( 2655 {"a": [1, 2, np.NaN], "b": [np.NaN, np.NaN, 3]}, index=index 2656 ) 2657 tm.assert_frame_equal(frame, expected) 2658 2659 def test_frame_from_records_utc(self): 2660 rec = {"datum": 1.5, "begin_time": datetime(2006, 4, 27, tzinfo=pytz.utc)} 2661 2662 # it works 2663 DataFrame.from_records([rec], index="begin_time") 2664 2665 def test_to_frame_with_falsey_names(self): 2666 # GH 16114 2667 result = Series(name=0, dtype=object).to_frame().dtypes 2668 expected = Series({0: object}) 2669 tm.assert_series_equal(result, expected) 2670 2671 result = DataFrame(Series(name=0, dtype=object)).dtypes 2672 tm.assert_series_equal(result, expected) 2673 2674 @pytest.mark.arm_slow 2675 @pytest.mark.parametrize("dtype", [None, "uint8", "category"]) 2676 def test_constructor_range_dtype(self, dtype): 2677 expected = DataFrame({"A": [0, 1, 2, 3, 4]}, dtype=dtype or "int64") 2678 2679 # GH 26342 2680 result = DataFrame(range(5), columns=["A"], dtype=dtype) 2681 tm.assert_frame_equal(result, expected) 2682 2683 # GH 16804 2684 result = DataFrame({"A": range(5)}, dtype=dtype) 2685 tm.assert_frame_equal(result, expected) 2686 2687 def test_frame_from_list_subclass(self): 2688 # GH21226 2689 class List(list): 2690 pass 2691 2692 expected = DataFrame([[1, 2, 3], [4, 5, 6]]) 2693 result = DataFrame(List([List([1, 2, 3]), List([4, 5, 6])])) 2694 tm.assert_frame_equal(result, expected) 2695 2696 @pytest.mark.parametrize( 2697 "extension_arr", 2698 [ 2699 Categorical(list("aabbc")), 2700 SparseArray([1, np.nan, np.nan, np.nan]), 2701 IntervalArray([Interval(0, 1), Interval(1, 5)]), 2702 PeriodArray(pd.period_range(start="1/1/2017", end="1/1/2018", freq="M")), 2703 ], 2704 ) 2705 def test_constructor_with_extension_array(self, extension_arr): 2706 # GH11363 2707 expected = DataFrame(Series(extension_arr)) 2708 result = DataFrame(extension_arr) 2709 tm.assert_frame_equal(result, expected) 2710 2711 def test_datetime_date_tuple_columns_from_dict(self): 2712 # GH 10863 2713 v = date.today() 2714 tup = v, v 2715 result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup]) 2716 expected = DataFrame([0, 1, 2], columns=Index(Series([tup]))) 2717 tm.assert_frame_equal(result, expected) 2718 2719 def test_construct_with_two_categoricalindex_series(self): 2720 # GH 14600 2721 s1 = Series([39, 6, 4], index=CategoricalIndex(["female", "male", "unknown"])) 2722 s2 = Series( 2723 [2, 152, 2, 242, 150], 2724 index=CategoricalIndex(["f", "female", "m", "male", "unknown"]), 2725 ) 2726 result = DataFrame([s1, s2]) 2727 expected = DataFrame( 2728 np.array( 2729 [[np.nan, 39.0, np.nan, 6.0, 4.0], [2.0, 152.0, 2.0, 242.0, 150.0]] 2730 ), 2731 columns=["f", "female", "m", "male", "unknown"], 2732 ) 2733 tm.assert_frame_equal(result, expected) 2734 2735 def test_from_M8_structured(self): 2736 dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] 2737 arr = np.array(dates, dtype=[("Date", "M8[us]"), ("Forecasting", "M8[us]")]) 2738 df = DataFrame(arr) 2739 2740 assert df["Date"][0] == dates[0][0] 2741 assert df["Forecasting"][0] == dates[0][1] 2742 2743 s = Series(arr["Date"]) 2744 assert isinstance(s[0], Timestamp) 2745 assert s[0] == dates[0][0] 2746 2747 def test_from_datetime_subclass(self): 2748 # GH21142 Verify whether Datetime subclasses are also of dtype datetime 2749 class DatetimeSubclass(datetime): 2750 pass 2751 2752 data = DataFrame({"datetime": [DatetimeSubclass(2020, 1, 1, 1, 1)]}) 2753 assert data.datetime.dtype == "datetime64[ns]" 2754 2755 def test_with_mismatched_index_length_raises(self): 2756 # GH#33437 2757 dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") 2758 with pytest.raises(ValueError, match="Shape of passed values"): 2759 DataFrame(dti, index=range(4)) 2760 2761 def test_frame_ctor_datetime64_column(self): 2762 rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") 2763 dates = np.asarray(rng) 2764 2765 df = DataFrame({"A": np.random.randn(len(rng)), "B": dates}) 2766 assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]")) 2767 2768 def test_dataframe_constructor_infer_multiindex(self): 2769 index_lists = [["a", "a", "b", "b"], ["x", "y", "x", "y"]] 2770 2771 multi = DataFrame( 2772 np.random.randn(4, 4), 2773 index=[np.array(x) for x in index_lists], 2774 ) 2775 assert isinstance(multi.index, MultiIndex) 2776 assert not isinstance(multi.columns, MultiIndex) 2777 2778 multi = DataFrame(np.random.randn(4, 4), columns=index_lists) 2779 assert isinstance(multi.columns, MultiIndex) 2780 2781 @pytest.mark.parametrize( 2782 "input_vals", 2783 [ 2784 ([1, 2]), 2785 (["1", "2"]), 2786 (list(date_range("1/1/2011", periods=2, freq="H"))), 2787 (list(date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))), 2788 ([Interval(left=0, right=5)]), 2789 ], 2790 ) 2791 def test_constructor_list_str(self, input_vals, string_dtype): 2792 # GH#16605 2793 # Ensure that data elements are converted to strings when 2794 # dtype is str, 'str', or 'U' 2795 2796 result = DataFrame({"A": input_vals}, dtype=string_dtype) 2797 expected = DataFrame({"A": input_vals}).astype({"A": string_dtype}) 2798 tm.assert_frame_equal(result, expected) 2799 2800 def test_constructor_list_str_na(self, string_dtype): 2801 2802 result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype) 2803 expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object) 2804 tm.assert_frame_equal(result, expected) 2805 2806 2807class TestDataFrameConstructorWithDatetimeTZ: 2808 @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) 2809 def test_construction_preserves_tzaware_dtypes(self, tz): 2810 # after GH#7822 2811 # these retain the timezones on dict construction 2812 dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") 2813 dr_tz = dr.tz_localize(tz) 2814 df = DataFrame({"A": "foo", "B": dr_tz}, index=dr) 2815 tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo) 2816 assert df["B"].dtype == tz_expected 2817 2818 # GH#2810 (with timezones) 2819 datetimes_naive = [ts.to_pydatetime() for ts in dr] 2820 datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] 2821 df = DataFrame({"dr": dr}) 2822 df["dr_tz"] = dr_tz 2823 df["datetimes_naive"] = datetimes_naive 2824 df["datetimes_with_tz"] = datetimes_with_tz 2825 result = df.dtypes 2826 expected = Series( 2827 [ 2828 np.dtype("datetime64[ns]"), 2829 DatetimeTZDtype(tz=tz), 2830 np.dtype("datetime64[ns]"), 2831 DatetimeTZDtype(tz=tz), 2832 ], 2833 index=["dr", "dr_tz", "datetimes_naive", "datetimes_with_tz"], 2834 ) 2835 tm.assert_series_equal(result, expected) 2836 2837 def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): 2838 # GH#25843 2839 tz = tz_aware_fixture 2840 result = DataFrame({"d": [Timestamp("2019", tz=tz)]}, dtype="datetime64[ns]") 2841 expected = DataFrame({"d": [Timestamp("2019")]}) 2842 tm.assert_frame_equal(result, expected) 2843 2844 def test_from_dict(self): 2845 2846 # 8260 2847 # support datetime64 with tz 2848 2849 idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") 2850 dr = date_range("20130110", periods=3) 2851 2852 # construction 2853 df = DataFrame({"A": idx, "B": dr}) 2854 assert df["A"].dtype, "M8[ns, US/Eastern" 2855 assert df["A"].name == "A" 2856 tm.assert_series_equal(df["A"], Series(idx, name="A")) 2857 tm.assert_series_equal(df["B"], Series(dr, name="B")) 2858 2859 def test_from_index(self): 2860 2861 # from index 2862 idx2 = date_range("20130101", periods=3, tz="US/Eastern", name="foo") 2863 df2 = DataFrame(idx2) 2864 tm.assert_series_equal(df2["foo"], Series(idx2, name="foo")) 2865 df2 = DataFrame(Series(idx2)) 2866 tm.assert_series_equal(df2["foo"], Series(idx2, name="foo")) 2867 2868 idx2 = date_range("20130101", periods=3, tz="US/Eastern") 2869 df2 = DataFrame(idx2) 2870 tm.assert_series_equal(df2[0], Series(idx2, name=0)) 2871 df2 = DataFrame(Series(idx2)) 2872 tm.assert_series_equal(df2[0], Series(idx2, name=0)) 2873 2874 def test_frame_dict_constructor_datetime64_1680(self): 2875 dr = date_range("1/1/2012", periods=10) 2876 s = Series(dr, index=dr) 2877 2878 # it works! 2879 DataFrame({"a": "foo", "b": s}, index=dr) 2880 DataFrame({"a": "foo", "b": s.values}, index=dr) 2881 2882 def test_frame_datetime64_mixed_index_ctor_1681(self): 2883 dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") 2884 ts = Series(dr) 2885 2886 # it works! 2887 d = DataFrame({"A": "foo", "B": ts}, index=dr) 2888 assert d["B"].isna().all() 2889 2890 def test_frame_timeseries_to_records(self): 2891 index = date_range("1/1/2000", periods=10) 2892 df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"]) 2893 2894 result = df.to_records() 2895 result["index"].dtype == "M8[ns]" 2896 2897 result = df.to_records(index=False) 2898 2899 def test_frame_timeseries_column(self): 2900 # GH19157 2901 dr = date_range(start="20130101T10:00:00", periods=3, freq="T", tz="US/Eastern") 2902 result = DataFrame(dr, columns=["timestamps"]) 2903 expected = DataFrame( 2904 { 2905 "timestamps": [ 2906 Timestamp("20130101T10:00:00", tz="US/Eastern"), 2907 Timestamp("20130101T10:01:00", tz="US/Eastern"), 2908 Timestamp("20130101T10:02:00", tz="US/Eastern"), 2909 ] 2910 } 2911 ) 2912 tm.assert_frame_equal(result, expected) 2913 2914 def test_nested_dict_construction(self): 2915 # GH22227 2916 columns = ["Nevada", "Ohio"] 2917 pop = { 2918 "Nevada": {2001: 2.4, 2002: 2.9}, 2919 "Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6}, 2920 } 2921 result = DataFrame(pop, index=[2001, 2002, 2003], columns=columns) 2922 expected = DataFrame( 2923 [(2.4, 1.7), (2.9, 3.6), (np.nan, np.nan)], 2924 columns=columns, 2925 index=Index([2001, 2002, 2003]), 2926 ) 2927 tm.assert_frame_equal(result, expected) 2928 2929 def test_from_tzaware_object_array(self): 2930 # GH#26825 2D object array of tzaware timestamps should not raise 2931 dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") 2932 data = dti._data.astype(object).reshape(1, -1) 2933 df = DataFrame(data) 2934 assert df.shape == (1, 3) 2935 assert (df.dtypes == dti.dtype).all() 2936 assert (df == dti).all().all() 2937 2938 def test_from_tzaware_mixed_object_array(self): 2939 # GH#26825 2940 arr = np.array( 2941 [ 2942 [ 2943 Timestamp("2013-01-01 00:00:00"), 2944 Timestamp("2013-01-02 00:00:00"), 2945 Timestamp("2013-01-03 00:00:00"), 2946 ], 2947 [ 2948 Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), 2949 pd.NaT, 2950 Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), 2951 ], 2952 [ 2953 Timestamp("2013-01-01 00:00:00+0100", tz="CET"), 2954 pd.NaT, 2955 Timestamp("2013-01-03 00:00:00+0100", tz="CET"), 2956 ], 2957 ], 2958 dtype=object, 2959 ).T 2960 res = DataFrame(arr, columns=["A", "B", "C"]) 2961 2962 expected_dtypes = [ 2963 "datetime64[ns]", 2964 "datetime64[ns, US/Eastern]", 2965 "datetime64[ns, CET]", 2966 ] 2967 assert (res.dtypes == expected_dtypes).all() 2968 2969 def test_from_2d_ndarray_with_dtype(self): 2970 # GH#12513 2971 array_dim2 = np.arange(10).reshape((5, 2)) 2972 df = DataFrame(array_dim2, dtype="datetime64[ns, UTC]") 2973 2974 expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]") 2975 tm.assert_frame_equal(df, expected) 2976 2977 def test_construction_from_set_raises(self): 2978 # https://github.com/pandas-dev/pandas/issues/32582 2979 msg = "Set type is unordered" 2980 with pytest.raises(TypeError, match=msg): 2981 DataFrame({"a": {1, 2, 3}}) 2982