1from collections import OrderedDict, abc
2from datetime import date, datetime, timedelta
3import functools
4import itertools
5import re
6
7import numpy as np
8import numpy.ma as ma
9import numpy.ma.mrecords as mrecords
10import pytest
11import pytz
12
13from pandas.compat import is_platform_little_endian
14from pandas.compat.numpy import _np_version_under1p19
15
16from pandas.core.dtypes.common import is_integer_dtype
17from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype
18
19import pandas as pd
20from pandas import (
21    Categorical,
22    CategoricalIndex,
23    DataFrame,
24    Index,
25    Interval,
26    MultiIndex,
27    Period,
28    RangeIndex,
29    Series,
30    Timedelta,
31    Timestamp,
32    date_range,
33    isna,
34)
35import pandas._testing as tm
36from pandas.arrays import IntervalArray, PeriodArray, SparseArray
37from pandas.core.construction import create_series_with_explicit_dtype
38
39MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"]
40MIXED_INT_DTYPES = [
41    "uint8",
42    "uint16",
43    "uint32",
44    "uint64",
45    "int8",
46    "int16",
47    "int32",
48    "int64",
49]
50
51
52class TestDataFrameConstructors:
53    def test_series_with_name_not_matching_column(self):
54        # GH#9232
55        x = Series(range(5), name=1)
56        y = Series(range(5), name=0)
57
58        result = DataFrame(x, columns=[0])
59        expected = DataFrame([], columns=[0])
60        tm.assert_frame_equal(result, expected)
61
62        result = DataFrame(y, columns=[1])
63        expected = DataFrame([], columns=[1])
64        tm.assert_frame_equal(result, expected)
65
66    @pytest.mark.parametrize(
67        "constructor",
68        [
69            lambda: DataFrame(),
70            lambda: DataFrame(None),
71            lambda: DataFrame({}),
72            lambda: DataFrame(()),
73            lambda: DataFrame([]),
74            lambda: DataFrame(_ for _ in []),
75            lambda: DataFrame(range(0)),
76            lambda: DataFrame(data=None),
77            lambda: DataFrame(data={}),
78            lambda: DataFrame(data=()),
79            lambda: DataFrame(data=[]),
80            lambda: DataFrame(data=(_ for _ in [])),
81            lambda: DataFrame(data=range(0)),
82        ],
83    )
84    def test_empty_constructor(self, constructor):
85        expected = DataFrame()
86        result = constructor()
87        assert len(result.index) == 0
88        assert len(result.columns) == 0
89        tm.assert_frame_equal(result, expected)
90
91    @pytest.mark.parametrize(
92        "emptylike,expected_index,expected_columns",
93        [
94            ([[]], RangeIndex(1), RangeIndex(0)),
95            ([[], []], RangeIndex(2), RangeIndex(0)),
96            ([(_ for _ in [])], RangeIndex(1), RangeIndex(0)),
97        ],
98    )
99    def test_emptylike_constructor(self, emptylike, expected_index, expected_columns):
100        expected = DataFrame(index=expected_index, columns=expected_columns)
101        result = DataFrame(emptylike)
102        tm.assert_frame_equal(result, expected)
103
104    def test_constructor_mixed(self, float_string_frame):
105        index, data = tm.getMixedTypeDict()
106
107        # TODO(wesm), incomplete test?
108        indexed_frame = DataFrame(data, index=index)  # noqa
109        unindexed_frame = DataFrame(data)  # noqa
110
111        assert float_string_frame["foo"].dtype == np.object_
112
113    def test_constructor_cast_failure(self):
114        foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64)
115        assert foo["a"].dtype == object
116
117        # GH 3010, constructing with odd arrays
118        df = DataFrame(np.ones((4, 2)))
119
120        # this is ok
121        df["foo"] = np.ones((4, 2)).tolist()
122
123        # this is not ok
124        msg = "Wrong number of items passed 2, placement implies 1"
125        with pytest.raises(ValueError, match=msg):
126            df["test"] = np.ones((4, 2))
127
128        # this is ok
129        df["foo2"] = np.ones((4, 2)).tolist()
130
131    def test_constructor_dtype_copy(self):
132        orig_df = DataFrame({"col1": [1.0], "col2": [2.0], "col3": [3.0]})
133
134        new_df = DataFrame(orig_df, dtype=float, copy=True)
135
136        new_df["col1"] = 200.0
137        assert orig_df["col1"][0] == 1.0
138
139    def test_constructor_dtype_nocast_view(self):
140        df = DataFrame([[1, 2]])
141        should_be_view = DataFrame(df, dtype=df[0].dtype)
142        should_be_view[0][0] = 99
143        assert df.values[0, 0] == 99
144
145        should_be_view = DataFrame(df.values, dtype=df[0].dtype)
146        should_be_view[0][0] = 97
147        assert df.values[0, 0] == 97
148
149    def test_constructor_dtype_list_data(self):
150        df = DataFrame([[1, "2"], [None, "a"]], dtype=object)
151        assert df.loc[1, 0] is None
152        assert df.loc[0, 1] == "2"
153
154    @pytest.mark.skipif(_np_version_under1p19, reason="NumPy change.")
155    def test_constructor_list_of_2d_raises(self):
156        # https://github.com/pandas-dev/pandas/issues/32289
157        a = DataFrame()
158        b = np.empty((0, 0))
159        with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"):
160            DataFrame([a])
161
162        with pytest.raises(ValueError, match=r"shape=\(1, 0, 0\)"):
163            DataFrame([b])
164
165        a = DataFrame({"A": [1, 2]})
166        with pytest.raises(ValueError, match=r"shape=\(2, 2, 1\)"):
167            DataFrame([a, a])
168
169    def test_constructor_mixed_dtypes(self):
170        def _make_mixed_dtypes_df(typ, ad=None):
171
172            if typ == "int":
173                dtypes = MIXED_INT_DTYPES
174                arrays = [np.array(np.random.rand(10), dtype=d) for d in dtypes]
175            elif typ == "float":
176                dtypes = MIXED_FLOAT_DTYPES
177                arrays = [
178                    np.array(np.random.randint(10, size=10), dtype=d) for d in dtypes
179                ]
180
181            for d, a in zip(dtypes, arrays):
182                assert a.dtype == d
183            if ad is None:
184                ad = {}
185            ad.update({d: a for d, a in zip(dtypes, arrays)})
186            return DataFrame(ad)
187
188        def _check_mixed_dtypes(df, dtypes=None):
189            if dtypes is None:
190                dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES
191            for d in dtypes:
192                if d in df:
193                    assert df.dtypes[d] == d
194
195        # mixed floating and integer coexist in the same frame
196        df = _make_mixed_dtypes_df("float")
197        _check_mixed_dtypes(df)
198
199        # add lots of types
200        df = _make_mixed_dtypes_df("float", {"A": 1, "B": "foo", "C": "bar"})
201        _check_mixed_dtypes(df)
202
203        # GH 622
204        df = _make_mixed_dtypes_df("int")
205        _check_mixed_dtypes(df)
206
207    def test_constructor_complex_dtypes(self):
208        # GH10952
209        a = np.random.rand(10).astype(np.complex64)
210        b = np.random.rand(10).astype(np.complex128)
211
212        df = DataFrame({"a": a, "b": b})
213        assert a.dtype == df.a.dtype
214        assert b.dtype == df.b.dtype
215
216    def test_constructor_dtype_str_na_values(self, string_dtype):
217        # https://github.com/pandas-dev/pandas/issues/21083
218        df = DataFrame({"A": ["x", None]}, dtype=string_dtype)
219        result = df.isna()
220        expected = DataFrame({"A": [False, True]})
221        tm.assert_frame_equal(result, expected)
222        assert df.iloc[1, 0] is None
223
224        df = DataFrame({"A": ["x", np.nan]}, dtype=string_dtype)
225        assert np.isnan(df.iloc[1, 0])
226
227    def test_constructor_rec(self, float_frame):
228        rec = float_frame.to_records(index=False)
229        rec.dtype.names = list(rec.dtype.names)[::-1]
230
231        index = float_frame.index
232
233        df = DataFrame(rec)
234        tm.assert_index_equal(df.columns, Index(rec.dtype.names))
235
236        df2 = DataFrame(rec, index=index)
237        tm.assert_index_equal(df2.columns, Index(rec.dtype.names))
238        tm.assert_index_equal(df2.index, index)
239
240        rng = np.arange(len(rec))[::-1]
241        df3 = DataFrame(rec, index=rng, columns=["C", "B"])
242        expected = DataFrame(rec, index=rng).reindex(columns=["C", "B"])
243        tm.assert_frame_equal(df3, expected)
244
245    def test_constructor_bool(self):
246        df = DataFrame({0: np.ones(10, dtype=bool), 1: np.zeros(10, dtype=bool)})
247        assert df.values.dtype == np.bool_
248
249    def test_constructor_overflow_int64(self):
250        # see gh-14881
251        values = np.array([2 ** 64 - i for i in range(1, 10)], dtype=np.uint64)
252
253        result = DataFrame({"a": values})
254        assert result["a"].dtype == np.uint64
255
256        # see gh-2355
257        data_scores = [
258            (6311132704823138710, 273),
259            (2685045978526272070, 23),
260            (8921811264899370420, 45),
261            (17019687244989530680, 270),
262            (9930107427299601010, 273),
263        ]
264        dtype = [("uid", "u8"), ("score", "u8")]
265        data = np.zeros((len(data_scores),), dtype=dtype)
266        data[:] = data_scores
267        df_crawls = DataFrame(data)
268        assert df_crawls["uid"].dtype == np.uint64
269
270    @pytest.mark.parametrize(
271        "values",
272        [
273            np.array([2 ** 64], dtype=object),
274            np.array([2 ** 65]),
275            [2 ** 64 + 1],
276            np.array([-(2 ** 63) - 4], dtype=object),
277            np.array([-(2 ** 64) - 1]),
278            [-(2 ** 65) - 2],
279        ],
280    )
281    def test_constructor_int_overflow(self, values):
282        # see gh-18584
283        value = values[0]
284        result = DataFrame(values)
285
286        assert result[0].dtype == object
287        assert result[0][0] == value
288
289    def test_constructor_ordereddict(self):
290        import random
291
292        nitems = 100
293        nums = list(range(nitems))
294        random.shuffle(nums)
295        expected = [f"A{i:d}" for i in nums]
296        df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems)))
297        assert expected == list(df.columns)
298
299    def test_constructor_dict(self):
300        datetime_series = tm.makeTimeSeries(nper=30)
301        # test expects index shifted by 5
302        datetime_series_short = tm.makeTimeSeries(nper=30)[5:]
303
304        frame = DataFrame({"col1": datetime_series, "col2": datetime_series_short})
305
306        # col2 is padded with NaN
307        assert len(datetime_series) == 30
308        assert len(datetime_series_short) == 25
309
310        tm.assert_series_equal(frame["col1"], datetime_series.rename("col1"))
311
312        exp = Series(
313            np.concatenate([[np.nan] * 5, datetime_series_short.values]),
314            index=datetime_series.index,
315            name="col2",
316        )
317        tm.assert_series_equal(exp, frame["col2"])
318
319        frame = DataFrame(
320            {"col1": datetime_series, "col2": datetime_series_short},
321            columns=["col2", "col3", "col4"],
322        )
323
324        assert len(frame) == len(datetime_series_short)
325        assert "col1" not in frame
326        assert isna(frame["col3"]).all()
327
328        # Corner cases
329        assert len(DataFrame()) == 0
330
331        # mix dict and array, wrong size - no spec for which error should raise
332        # first
333        msg = "Mixing dicts with non-Series may lead to ambiguous ordering."
334        with pytest.raises(ValueError, match=msg):
335            DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]})
336
337        # Length-one dict micro-optimization
338        frame = DataFrame({"A": {"1": 1, "2": 2}})
339        tm.assert_index_equal(frame.index, Index(["1", "2"]))
340
341        # empty dict plus index
342        idx = Index([0, 1, 2])
343        frame = DataFrame({}, index=idx)
344        assert frame.index is idx
345
346        # empty dict with index and columns
347        idx = Index([0, 1, 2])
348        frame = DataFrame({}, index=idx, columns=idx)
349        assert frame.index is idx
350        assert frame.columns is idx
351        assert len(frame._series) == 3
352
353        # with dict of empty list and Series
354        frame = DataFrame({"A": [], "B": []}, columns=["A", "B"])
355        tm.assert_index_equal(frame.index, RangeIndex(0), exact=True)
356
357        # GH 14381
358        # Dict with None value
359        frame_none = DataFrame({"a": None}, index=[0])
360        frame_none_list = DataFrame({"a": [None]}, index=[0])
361        assert frame_none._get_value(0, "a") is None
362        assert frame_none_list._get_value(0, "a") is None
363        tm.assert_frame_equal(frame_none, frame_none_list)
364
365        # GH10856
366        # dict with scalar values should raise error, even if columns passed
367        msg = "If using all scalar values, you must pass an index"
368        with pytest.raises(ValueError, match=msg):
369            DataFrame({"a": 0.7})
370
371        with pytest.raises(ValueError, match=msg):
372            DataFrame({"a": 0.7}, columns=["a"])
373
374    @pytest.mark.parametrize("scalar", [2, np.nan, None, "D"])
375    def test_constructor_invalid_items_unused(self, scalar):
376        # No error if invalid (scalar) value is in fact not used:
377        result = DataFrame({"a": scalar}, columns=["b"])
378        expected = DataFrame(columns=["b"])
379        tm.assert_frame_equal(result, expected)
380
381    @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")])
382    def test_constructor_dict_nan_key(self, value):
383        # GH 18455
384        cols = [1, value, 3]
385        idx = ["a", value]
386        values = [[0, 3], [1, 4], [2, 5]]
387        data = {cols[c]: Series(values[c], index=idx) for c in range(3)}
388        result = DataFrame(data).sort_values(1).sort_values("a", axis=1)
389        expected = DataFrame(
390            np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols
391        )
392        tm.assert_frame_equal(result, expected)
393
394        result = DataFrame(data, index=idx).sort_values("a", axis=1)
395        tm.assert_frame_equal(result, expected)
396
397        result = DataFrame(data, index=idx, columns=cols)
398        tm.assert_frame_equal(result, expected)
399
400    @pytest.mark.parametrize("value", [np.nan, None, float("nan")])
401    def test_constructor_dict_nan_tuple_key(self, value):
402        # GH 18455
403        cols = Index([(11, 21), (value, 22), (13, value)])
404        idx = Index([("a", value), (value, 2)])
405        values = [[0, 3], [1, 4], [2, 5]]
406        data = {cols[c]: Series(values[c], index=idx) for c in range(3)}
407        result = DataFrame(data).sort_values((11, 21)).sort_values(("a", value), axis=1)
408        expected = DataFrame(
409            np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols
410        )
411        tm.assert_frame_equal(result, expected)
412
413        result = DataFrame(data, index=idx).sort_values(("a", value), axis=1)
414        tm.assert_frame_equal(result, expected)
415
416        result = DataFrame(data, index=idx, columns=cols)
417        tm.assert_frame_equal(result, expected)
418
419    def test_constructor_dict_order_insertion(self):
420        datetime_series = tm.makeTimeSeries(nper=30)
421        datetime_series_short = tm.makeTimeSeries(nper=25)
422
423        # GH19018
424        # initialization ordering: by insertion order if python>= 3.6
425        d = {"b": datetime_series_short, "a": datetime_series}
426        frame = DataFrame(data=d)
427        expected = DataFrame(data=d, columns=list("ba"))
428        tm.assert_frame_equal(frame, expected)
429
430    def test_constructor_dict_nan_key_and_columns(self):
431        # GH 16894
432        result = DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2])
433        expected = DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2])
434        tm.assert_frame_equal(result, expected)
435
436    def test_constructor_multi_index(self):
437        # GH 4078
438        # construction error with mi and all-nan frame
439        tuples = [(2, 3), (3, 3), (3, 3)]
440        mi = MultiIndex.from_tuples(tuples)
441        df = DataFrame(index=mi, columns=mi)
442        assert isna(df).values.ravel().all()
443
444        tuples = [(3, 3), (2, 3), (3, 3)]
445        mi = MultiIndex.from_tuples(tuples)
446        df = DataFrame(index=mi, columns=mi)
447        assert isna(df).values.ravel().all()
448
449    def test_constructor_2d_index(self):
450        # GH 25416
451        # handling of 2d index in construction
452        df = DataFrame([[1]], columns=[[1]], index=[1, 2])
453        expected = DataFrame(
454            [1, 1],
455            index=pd.Int64Index([1, 2], dtype="int64"),
456            columns=MultiIndex(levels=[[1]], codes=[[0]]),
457        )
458        tm.assert_frame_equal(df, expected)
459
460        df = DataFrame([[1]], columns=[[1]], index=[[1, 2]])
461        expected = DataFrame(
462            [1, 1],
463            index=MultiIndex(levels=[[1, 2]], codes=[[0, 1]]),
464            columns=MultiIndex(levels=[[1]], codes=[[0]]),
465        )
466        tm.assert_frame_equal(df, expected)
467
468    def test_constructor_error_msgs(self):
469        msg = "Empty data passed with indices specified."
470        # passing an empty array with columns specified.
471        with pytest.raises(ValueError, match=msg):
472            DataFrame(np.empty(0), columns=list("abc"))
473
474        msg = "Mixing dicts with non-Series may lead to ambiguous ordering."
475        # mix dict and array, wrong size
476        with pytest.raises(ValueError, match=msg):
477            DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]})
478
479        # wrong size ndarray, GH 3105
480        msg = r"Shape of passed values is \(4, 3\), indices imply \(3, 3\)"
481        with pytest.raises(ValueError, match=msg):
482            DataFrame(
483                np.arange(12).reshape((4, 3)),
484                columns=["foo", "bar", "baz"],
485                index=date_range("2000-01-01", periods=3),
486            )
487
488        arr = np.array([[4, 5, 6]])
489        msg = r"Shape of passed values is \(1, 3\), indices imply \(1, 4\)"
490        with pytest.raises(ValueError, match=msg):
491            DataFrame(index=[0], columns=range(0, 4), data=arr)
492
493        arr = np.array([4, 5, 6])
494        msg = r"Shape of passed values is \(3, 1\), indices imply \(1, 4\)"
495        with pytest.raises(ValueError, match=msg):
496            DataFrame(index=[0], columns=range(0, 4), data=arr)
497
498        # higher dim raise exception
499        with pytest.raises(ValueError, match="Must pass 2-d input"):
500            DataFrame(np.zeros((3, 3, 3)), columns=["A", "B", "C"], index=[1])
501
502        # wrong size axis labels
503        msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
504        with pytest.raises(ValueError, match=msg):
505            DataFrame(np.random.rand(2, 3), columns=["A", "B", "C"], index=[1])
506
507        msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)"
508        with pytest.raises(ValueError, match=msg):
509            DataFrame(np.random.rand(2, 3), columns=["A", "B"], index=[1, 2])
510
511        # gh-26429
512        msg = "2 columns passed, passed data had 10 columns"
513        with pytest.raises(ValueError, match=msg):
514            DataFrame((range(10), range(10, 20)), columns=("ones", "twos"))
515
516        msg = "If using all scalar values, you must pass an index"
517        with pytest.raises(ValueError, match=msg):
518            DataFrame({"a": False, "b": True})
519
520    def test_constructor_subclass_dict(self, float_frame, dict_subclass):
521        # Test for passing dict subclass to constructor
522        data = {
523            "col1": dict_subclass((x, 10.0 * x) for x in range(10)),
524            "col2": dict_subclass((x, 20.0 * x) for x in range(10)),
525        }
526        df = DataFrame(data)
527        refdf = DataFrame({col: dict(val.items()) for col, val in data.items()})
528        tm.assert_frame_equal(refdf, df)
529
530        data = dict_subclass(data.items())
531        df = DataFrame(data)
532        tm.assert_frame_equal(refdf, df)
533
534        # try with defaultdict
535        from collections import defaultdict
536
537        data = {}
538        float_frame["B"][:10] = np.nan
539        for k, v in float_frame.items():
540            dct = defaultdict(dict)
541            dct.update(v.to_dict())
542            data[k] = dct
543        frame = DataFrame(data)
544        expected = frame.reindex(index=float_frame.index)
545        tm.assert_frame_equal(float_frame, expected)
546
547    def test_constructor_dict_block(self):
548        expected = np.array([[4.0, 3.0, 2.0, 1.0]])
549        df = DataFrame(
550            {"d": [4.0], "c": [3.0], "b": [2.0], "a": [1.0]},
551            columns=["d", "c", "b", "a"],
552        )
553        tm.assert_numpy_array_equal(df.values, expected)
554
555    def test_constructor_dict_cast(self):
556        # cast float tests
557        test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}}
558        frame = DataFrame(test_data, dtype=float)
559        assert len(frame) == 3
560        assert frame["B"].dtype == np.float64
561        assert frame["A"].dtype == np.float64
562
563        frame = DataFrame(test_data)
564        assert len(frame) == 3
565        assert frame["B"].dtype == np.object_
566        assert frame["A"].dtype == np.float64
567
568        # can't cast to float
569        test_data = {
570            "A": dict(zip(range(20), tm.makeStringIndex(20))),
571            "B": dict(zip(range(15), np.random.randn(15))),
572        }
573        frame = DataFrame(test_data, dtype=float)
574        assert len(frame) == 20
575        assert frame["A"].dtype == np.object_
576        assert frame["B"].dtype == np.float64
577
578    def test_constructor_dict_dont_upcast(self):
579        d = {"Col1": {"Row1": "A String", "Row2": np.nan}}
580        df = DataFrame(d)
581        assert isinstance(df["Col1"]["Row2"], float)
582
583        dm = DataFrame([[1, 2], ["a", "b"]], index=[1, 2], columns=[1, 2])
584        assert isinstance(dm[1][1], int)
585
586    def test_constructor_dict_of_tuples(self):
587        # GH #1491
588        data = {"a": (1, 2, 3), "b": (4, 5, 6)}
589
590        result = DataFrame(data)
591        expected = DataFrame({k: list(v) for k, v in data.items()})
592        tm.assert_frame_equal(result, expected, check_dtype=False)
593
594    def test_constructor_dict_of_ranges(self):
595        # GH 26356
596        data = {"a": range(3), "b": range(3, 6)}
597
598        result = DataFrame(data)
599        expected = DataFrame({"a": [0, 1, 2], "b": [3, 4, 5]})
600        tm.assert_frame_equal(result, expected)
601
602    def test_constructor_dict_of_iterators(self):
603        # GH 26349
604        data = {"a": iter(range(3)), "b": reversed(range(3))}
605
606        result = DataFrame(data)
607        expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]})
608        tm.assert_frame_equal(result, expected)
609
610    def test_constructor_dict_of_generators(self):
611        # GH 26349
612        data = {"a": (i for i in (range(3))), "b": (i for i in reversed(range(3)))}
613        result = DataFrame(data)
614        expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]})
615        tm.assert_frame_equal(result, expected)
616
617    def test_constructor_dict_multiindex(self):
618        def check(result, expected):
619            return tm.assert_frame_equal(
620                result,
621                expected,
622                check_dtype=True,
623                check_index_type=True,
624                check_column_type=True,
625                check_names=True,
626            )
627
628        d = {
629            ("a", "a"): {("i", "i"): 0, ("i", "j"): 1, ("j", "i"): 2},
630            ("b", "a"): {("i", "i"): 6, ("i", "j"): 5, ("j", "i"): 4},
631            ("b", "c"): {("i", "i"): 7, ("i", "j"): 8, ("j", "i"): 9},
632        }
633        _d = sorted(d.items())
634        df = DataFrame(d)
635        expected = DataFrame(
636            [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d])
637        ).T
638        expected.index = MultiIndex.from_tuples(expected.index)
639        check(df, expected)
640
641        d["z"] = {"y": 123.0, ("i", "i"): 111, ("i", "j"): 111, ("j", "i"): 111}
642        _d.insert(0, ("z", d["z"]))
643        expected = DataFrame(
644            [x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False)
645        ).T
646        expected.index = Index(expected.index, tupleize_cols=False)
647        df = DataFrame(d)
648        df = df.reindex(columns=expected.columns, index=expected.index)
649        check(df, expected)
650
651    def test_constructor_dict_datetime64_index(self):
652        # GH 10160
653        dates_as_str = ["1984-02-19", "1988-11-06", "1989-12-03", "1990-03-15"]
654
655        def create_data(constructor):
656            return {i: {constructor(s): 2 * i} for i, s in enumerate(dates_as_str)}
657
658        data_datetime64 = create_data(np.datetime64)
659        data_datetime = create_data(lambda x: datetime.strptime(x, "%Y-%m-%d"))
660        data_Timestamp = create_data(Timestamp)
661
662        expected = DataFrame(
663            [
664                {0: 0, 1: None, 2: None, 3: None},
665                {0: None, 1: 2, 2: None, 3: None},
666                {0: None, 1: None, 2: 4, 3: None},
667                {0: None, 1: None, 2: None, 3: 6},
668            ],
669            index=[Timestamp(dt) for dt in dates_as_str],
670        )
671
672        result_datetime64 = DataFrame(data_datetime64)
673        result_datetime = DataFrame(data_datetime)
674        result_Timestamp = DataFrame(data_Timestamp)
675        tm.assert_frame_equal(result_datetime64, expected)
676        tm.assert_frame_equal(result_datetime, expected)
677        tm.assert_frame_equal(result_Timestamp, expected)
678
679    def test_constructor_dict_timedelta64_index(self):
680        # GH 10160
681        td_as_int = [1, 2, 3, 4]
682
683        def create_data(constructor):
684            return {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)}
685
686        data_timedelta64 = create_data(lambda x: np.timedelta64(x, "D"))
687        data_timedelta = create_data(lambda x: timedelta(days=x))
688        data_Timedelta = create_data(lambda x: Timedelta(x, "D"))
689
690        expected = DataFrame(
691            [
692                {0: 0, 1: None, 2: None, 3: None},
693                {0: None, 1: 2, 2: None, 3: None},
694                {0: None, 1: None, 2: 4, 3: None},
695                {0: None, 1: None, 2: None, 3: 6},
696            ],
697            index=[Timedelta(td, "D") for td in td_as_int],
698        )
699
700        result_timedelta64 = DataFrame(data_timedelta64)
701        result_timedelta = DataFrame(data_timedelta)
702        result_Timedelta = DataFrame(data_Timedelta)
703        tm.assert_frame_equal(result_timedelta64, expected)
704        tm.assert_frame_equal(result_timedelta, expected)
705        tm.assert_frame_equal(result_Timedelta, expected)
706
707    def test_constructor_period_dict(self):
708        # PeriodIndex
709        a = pd.PeriodIndex(["2012-01", "NaT", "2012-04"], freq="M")
710        b = pd.PeriodIndex(["2012-02-01", "2012-03-01", "NaT"], freq="D")
711        df = DataFrame({"a": a, "b": b})
712        assert df["a"].dtype == a.dtype
713        assert df["b"].dtype == b.dtype
714
715        # list of periods
716        df = DataFrame({"a": a.astype(object).tolist(), "b": b.astype(object).tolist()})
717        assert df["a"].dtype == a.dtype
718        assert df["b"].dtype == b.dtype
719
720    def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype):
721        ea_scalar, ea_dtype = ea_scalar_and_dtype
722        df = DataFrame({"a": ea_scalar}, index=[0])
723        assert df["a"].dtype == ea_dtype
724
725        expected = DataFrame(index=[0], columns=["a"], data=ea_scalar)
726
727        tm.assert_frame_equal(df, expected)
728
729    @pytest.mark.parametrize(
730        "data,dtype",
731        [
732            (Period("2020-01"), PeriodDtype("M")),
733            (Interval(left=0, right=5), IntervalDtype("int64")),
734            (
735                Timestamp("2011-01-01", tz="US/Eastern"),
736                DatetimeTZDtype(tz="US/Eastern"),
737            ),
738        ],
739    )
740    def test_constructor_extension_scalar_data(self, data, dtype):
741        # GH 34832
742        df = DataFrame(index=[0, 1], columns=["a", "b"], data=data)
743
744        assert df["a"].dtype == dtype
745        assert df["b"].dtype == dtype
746
747        arr = pd.array([data] * 2, dtype=dtype)
748        expected = DataFrame({"a": arr, "b": arr})
749
750        tm.assert_frame_equal(df, expected)
751
752    def test_nested_dict_frame_constructor(self):
753        rng = pd.period_range("1/1/2000", periods=5)
754        df = DataFrame(np.random.randn(10, 5), columns=rng)
755
756        data = {}
757        for col in df.columns:
758            for row in df.index:
759                data.setdefault(col, {})[row] = df._get_value(row, col)
760
761        result = DataFrame(data, columns=rng)
762        tm.assert_frame_equal(result, df)
763
764        data = {}
765        for col in df.columns:
766            for row in df.index:
767                data.setdefault(row, {})[col] = df._get_value(row, col)
768
769        result = DataFrame(data, index=rng).T
770        tm.assert_frame_equal(result, df)
771
772    def _check_basic_constructor(self, empty):
773        # mat: 2d matrix with shape (3, 2) to input. empty - makes sized
774        # objects
775        mat = empty((2, 3), dtype=float)
776        # 2-D input
777        frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
778
779        assert len(frame.index) == 2
780        assert len(frame.columns) == 3
781
782        # 1-D input
783        frame = DataFrame(empty((3,)), columns=["A"], index=[1, 2, 3])
784        assert len(frame.index) == 3
785        assert len(frame.columns) == 1
786
787        # cast type
788        frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64)
789        assert frame.values.dtype == np.int64
790
791        # wrong size axis labels
792        msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
793        with pytest.raises(ValueError, match=msg):
794            DataFrame(mat, columns=["A", "B", "C"], index=[1])
795        msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)"
796        with pytest.raises(ValueError, match=msg):
797            DataFrame(mat, columns=["A", "B"], index=[1, 2])
798
799        # higher dim raise exception
800        with pytest.raises(ValueError, match="Must pass 2-d input"):
801            DataFrame(empty((3, 3, 3)), columns=["A", "B", "C"], index=[1])
802
803        # automatic labeling
804        frame = DataFrame(mat)
805        tm.assert_index_equal(frame.index, Index(range(2)), exact=True)
806        tm.assert_index_equal(frame.columns, Index(range(3)), exact=True)
807
808        frame = DataFrame(mat, index=[1, 2])
809        tm.assert_index_equal(frame.columns, Index(range(3)), exact=True)
810
811        frame = DataFrame(mat, columns=["A", "B", "C"])
812        tm.assert_index_equal(frame.index, Index(range(2)), exact=True)
813
814        # 0-length axis
815        frame = DataFrame(empty((0, 3)))
816        assert len(frame.index) == 0
817
818        frame = DataFrame(empty((3, 0)))
819        assert len(frame.columns) == 0
820
821    def test_constructor_ndarray(self):
822        self._check_basic_constructor(np.ones)
823
824        frame = DataFrame(["foo", "bar"], index=[0, 1], columns=["A"])
825        assert len(frame) == 2
826
827    def test_constructor_maskedarray(self):
828        self._check_basic_constructor(ma.masked_all)
829
830        # Check non-masked values
831        mat = ma.masked_all((2, 3), dtype=float)
832        mat[0, 0] = 1.0
833        mat[1, 2] = 2.0
834        frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
835        assert 1.0 == frame["A"][1]
836        assert 2.0 == frame["C"][2]
837
838        # what is this even checking??
839        mat = ma.masked_all((2, 3), dtype=float)
840        frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
841        assert np.all(~np.asarray(frame == frame))
842
843    def test_constructor_maskedarray_nonfloat(self):
844        # masked int promoted to float
845        mat = ma.masked_all((2, 3), dtype=int)
846        # 2-D input
847        frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
848
849        assert len(frame.index) == 2
850        assert len(frame.columns) == 3
851        assert np.all(~np.asarray(frame == frame))
852
853        # cast type
854        frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.float64)
855        assert frame.values.dtype == np.float64
856
857        # Check non-masked values
858        mat2 = ma.copy(mat)
859        mat2[0, 0] = 1
860        mat2[1, 2] = 2
861        frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2])
862        assert 1 == frame["A"][1]
863        assert 2 == frame["C"][2]
864
865        # masked np.datetime64 stays (use NaT as null)
866        mat = ma.masked_all((2, 3), dtype="M8[ns]")
867        # 2-D input
868        frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
869
870        assert len(frame.index) == 2
871        assert len(frame.columns) == 3
872        assert isna(frame).values.all()
873
874        # cast type
875        frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64)
876        assert frame.values.dtype == np.int64
877
878        # Check non-masked values
879        mat2 = ma.copy(mat)
880        mat2[0, 0] = 1
881        mat2[1, 2] = 2
882        frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2])
883        assert 1 == frame["A"].view("i8")[1]
884        assert 2 == frame["C"].view("i8")[2]
885
886        # masked bool promoted to object
887        mat = ma.masked_all((2, 3), dtype=bool)
888        # 2-D input
889        frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2])
890
891        assert len(frame.index) == 2
892        assert len(frame.columns) == 3
893        assert np.all(~np.asarray(frame == frame))
894
895        # cast type
896        frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=object)
897        assert frame.values.dtype == object
898
899        # Check non-masked values
900        mat2 = ma.copy(mat)
901        mat2[0, 0] = True
902        mat2[1, 2] = False
903        frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2])
904        assert frame["A"][1] is True
905        assert frame["C"][2] is False
906
907    def test_constructor_maskedarray_hardened(self):
908        # Check numpy masked arrays with hard masks -- from GH24574
909        mat_hard = ma.masked_all((2, 2), dtype=float).harden_mask()
910        result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2])
911        expected = DataFrame(
912            {"A": [np.nan, np.nan], "B": [np.nan, np.nan]},
913            columns=["A", "B"],
914            index=[1, 2],
915            dtype=float,
916        )
917        tm.assert_frame_equal(result, expected)
918        # Check case where mask is hard but no data are masked
919        mat_hard = ma.ones((2, 2), dtype=float).harden_mask()
920        result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2])
921        expected = DataFrame(
922            {"A": [1.0, 1.0], "B": [1.0, 1.0]},
923            columns=["A", "B"],
924            index=[1, 2],
925            dtype=float,
926        )
927        tm.assert_frame_equal(result, expected)
928
929    def test_constructor_maskedrecarray_dtype(self):
930        # Ensure constructor honors dtype
931        data = np.ma.array(
932            np.ma.zeros(5, dtype=[("date", "<f8"), ("price", "<f8")]), mask=[False] * 5
933        )
934        data = data.view(mrecords.mrecarray)
935        result = DataFrame(data, dtype=int)
936        expected = DataFrame(np.zeros((5, 2), dtype=int), columns=["date", "price"])
937        tm.assert_frame_equal(result, expected)
938
939    def test_constructor_mrecarray(self):
940        # Ensure mrecarray produces frame identical to dict of masked arrays
941        # from GH3479
942
943        assert_fr_equal = functools.partial(
944            tm.assert_frame_equal, check_index_type=True, check_column_type=True
945        )
946        arrays = [
947            ("float", np.array([1.5, 2.0])),
948            ("int", np.array([1, 2])),
949            ("str", np.array(["abc", "def"])),
950        ]
951        for name, arr in arrays[:]:
952            arrays.append(
953                ("masked1_" + name, np.ma.masked_array(arr, mask=[False, True]))
954            )
955        arrays.append(("masked_all", np.ma.masked_all((2,))))
956        arrays.append(("masked_none", np.ma.masked_array([1.0, 2.5], mask=False)))
957
958        # call assert_frame_equal for all selections of 3 arrays
959        for comb in itertools.combinations(arrays, 3):
960            names, data = zip(*comb)
961            mrecs = mrecords.fromarrays(data, names=names)
962
963            # fill the comb
964            comb = {k: (v.filled() if hasattr(v, "filled") else v) for k, v in comb}
965
966            expected = DataFrame(comb, columns=names)
967            result = DataFrame(mrecs)
968            assert_fr_equal(result, expected)
969
970            # specify columns
971            expected = DataFrame(comb, columns=names[::-1])
972            result = DataFrame(mrecs, columns=names[::-1])
973            assert_fr_equal(result, expected)
974
975            # specify index
976            expected = DataFrame(comb, columns=names, index=[1, 2])
977            result = DataFrame(mrecs, index=[1, 2])
978            assert_fr_equal(result, expected)
979
980    def test_constructor_corner_shape(self):
981        df = DataFrame(index=[])
982        assert df.values.shape == (0, 0)
983
984    @pytest.mark.parametrize(
985        "data, index, columns, dtype, expected",
986        [
987            (None, list(range(10)), ["a", "b"], object, np.object_),
988            (None, None, ["a", "b"], "int64", np.dtype("int64")),
989            (None, list(range(10)), ["a", "b"], int, np.dtype("float64")),
990            ({}, None, ["foo", "bar"], None, np.object_),
991            ({"b": 1}, list(range(10)), list("abc"), int, np.dtype("float64")),
992        ],
993    )
994    def test_constructor_dtype(self, data, index, columns, dtype, expected):
995        df = DataFrame(data, index, columns, dtype)
996        assert df.values.dtype == expected
997
998    @pytest.mark.parametrize(
999        "data,input_dtype,expected_dtype",
1000        (
1001            ([True, False, None], "boolean", pd.BooleanDtype),
1002            ([1.0, 2.0, None], "Float64", pd.Float64Dtype),
1003            ([1, 2, None], "Int64", pd.Int64Dtype),
1004            (["a", "b", "c"], "string", pd.StringDtype),
1005        ),
1006    )
1007    def test_constructor_dtype_nullable_extension_arrays(
1008        self, data, input_dtype, expected_dtype
1009    ):
1010        df = DataFrame({"a": data}, dtype=input_dtype)
1011        assert df["a"].dtype == expected_dtype()
1012
1013    def test_constructor_scalar_inference(self):
1014        data = {"int": 1, "bool": True, "float": 3.0, "complex": 4j, "object": "foo"}
1015        df = DataFrame(data, index=np.arange(10))
1016
1017        assert df["int"].dtype == np.int64
1018        assert df["bool"].dtype == np.bool_
1019        assert df["float"].dtype == np.float64
1020        assert df["complex"].dtype == np.complex128
1021        assert df["object"].dtype == np.object_
1022
1023    def test_constructor_arrays_and_scalars(self):
1024        df = DataFrame({"a": np.random.randn(10), "b": True})
1025        exp = DataFrame({"a": df["a"].values, "b": [True] * 10})
1026
1027        tm.assert_frame_equal(df, exp)
1028        with pytest.raises(ValueError, match="must pass an index"):
1029            DataFrame({"a": False, "b": True})
1030
1031    def test_constructor_DataFrame(self, float_frame):
1032        df = DataFrame(float_frame)
1033        tm.assert_frame_equal(df, float_frame)
1034
1035        df_casted = DataFrame(float_frame, dtype=np.int64)
1036        assert df_casted.values.dtype == np.int64
1037
1038    def test_constructor_more(self, float_frame):
1039        # used to be in test_matrix.py
1040        arr = np.random.randn(10)
1041        dm = DataFrame(arr, columns=["A"], index=np.arange(10))
1042        assert dm.values.ndim == 2
1043
1044        arr = np.random.randn(0)
1045        dm = DataFrame(arr)
1046        assert dm.values.ndim == 2
1047        assert dm.values.ndim == 2
1048
1049        # no data specified
1050        dm = DataFrame(columns=["A", "B"], index=np.arange(10))
1051        assert dm.values.shape == (10, 2)
1052
1053        dm = DataFrame(columns=["A", "B"])
1054        assert dm.values.shape == (0, 2)
1055
1056        dm = DataFrame(index=np.arange(10))
1057        assert dm.values.shape == (10, 0)
1058
1059        # can't cast
1060        mat = np.array(["foo", "bar"], dtype=object).reshape(2, 1)
1061        with pytest.raises(ValueError, match="cast"):
1062            DataFrame(mat, index=[0, 1], columns=[0], dtype=float)
1063
1064        dm = DataFrame(DataFrame(float_frame._series))
1065        tm.assert_frame_equal(dm, float_frame)
1066
1067        # int cast
1068        dm = DataFrame(
1069            {"A": np.ones(10, dtype=int), "B": np.ones(10, dtype=np.float64)},
1070            index=np.arange(10),
1071        )
1072
1073        assert len(dm.columns) == 2
1074        assert dm.values.dtype == np.float64
1075
1076    def test_constructor_empty_list(self):
1077        df = DataFrame([], index=[])
1078        expected = DataFrame(index=[])
1079        tm.assert_frame_equal(df, expected)
1080
1081        # GH 9939
1082        df = DataFrame([], columns=["A", "B"])
1083        expected = DataFrame({}, columns=["A", "B"])
1084        tm.assert_frame_equal(df, expected)
1085
1086        # Empty generator: list(empty_gen()) == []
1087        def empty_gen():
1088            return
1089            yield
1090
1091        df = DataFrame(empty_gen(), columns=["A", "B"])
1092        tm.assert_frame_equal(df, expected)
1093
1094    def test_constructor_list_of_lists(self):
1095        # GH #484
1096        df = DataFrame(data=[[1, "a"], [2, "b"]], columns=["num", "str"])
1097        assert is_integer_dtype(df["num"])
1098        assert df["str"].dtype == np.object_
1099
1100        # GH 4851
1101        # list of 0-dim ndarrays
1102        expected = DataFrame({0: np.arange(10)})
1103        data = [np.array(x) for x in range(10)]
1104        result = DataFrame(data)
1105        tm.assert_frame_equal(result, expected)
1106
1107    def test_constructor_list_like_data_nested_list_column(self):
1108        # GH 32173
1109        arrays = [list("abcd"), list("cdef")]
1110        result = DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
1111
1112        mi = MultiIndex.from_arrays(arrays)
1113        expected = DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=mi)
1114
1115        tm.assert_frame_equal(result, expected)
1116
1117    def test_constructor_wrong_length_nested_list_column(self):
1118        # GH 32173
1119        arrays = [list("abc"), list("cde")]
1120
1121        msg = "3 columns passed, passed data had 4"
1122        with pytest.raises(ValueError, match=msg):
1123            DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
1124
1125    def test_constructor_unequal_length_nested_list_column(self):
1126        # GH 32173
1127        arrays = [list("abcd"), list("cde")]
1128
1129        msg = "Length of columns passed for MultiIndex columns is different"
1130        with pytest.raises(ValueError, match=msg):
1131            DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
1132
1133    def test_constructor_sequence_like(self):
1134        # GH 3783
1135        # collections.Squence like
1136
1137        class DummyContainer(abc.Sequence):
1138            def __init__(self, lst):
1139                self._lst = lst
1140
1141            def __getitem__(self, n):
1142                return self._lst.__getitem__(n)
1143
1144            def __len__(self, n):
1145                return self._lst.__len__()
1146
1147        lst_containers = [DummyContainer([1, "a"]), DummyContainer([2, "b"])]
1148        columns = ["num", "str"]
1149        result = DataFrame(lst_containers, columns=columns)
1150        expected = DataFrame([[1, "a"], [2, "b"]], columns=columns)
1151        tm.assert_frame_equal(result, expected, check_dtype=False)
1152
1153        # GH 4297
1154        # support Array
1155        import array
1156
1157        result = DataFrame({"A": array.array("i", range(10))})
1158        expected = DataFrame({"A": list(range(10))})
1159        tm.assert_frame_equal(result, expected, check_dtype=False)
1160
1161        expected = DataFrame([list(range(10)), list(range(10))])
1162        result = DataFrame([array.array("i", range(10)), array.array("i", range(10))])
1163        tm.assert_frame_equal(result, expected, check_dtype=False)
1164
1165    def test_constructor_range(self):
1166        # GH26342
1167        result = DataFrame(range(10))
1168        expected = DataFrame(list(range(10)))
1169        tm.assert_frame_equal(result, expected)
1170
1171    def test_constructor_list_of_ranges(self):
1172        result = DataFrame([range(10), range(10)])
1173        expected = DataFrame([list(range(10)), list(range(10))])
1174        tm.assert_frame_equal(result, expected)
1175
1176    def test_constructor_iterable(self):
1177        # GH 21987
1178        class Iter:
1179            def __iter__(self):
1180                for i in range(10):
1181                    yield [1, 2, 3]
1182
1183        expected = DataFrame([[1, 2, 3]] * 10)
1184        result = DataFrame(Iter())
1185        tm.assert_frame_equal(result, expected)
1186
1187    def test_constructor_iterator(self):
1188        result = DataFrame(iter(range(10)))
1189        expected = DataFrame(list(range(10)))
1190        tm.assert_frame_equal(result, expected)
1191
1192    def test_constructor_list_of_iterators(self):
1193        result = DataFrame([iter(range(10)), iter(range(10))])
1194        expected = DataFrame([list(range(10)), list(range(10))])
1195        tm.assert_frame_equal(result, expected)
1196
1197    def test_constructor_generator(self):
1198        # related #2305
1199
1200        gen1 = (i for i in range(10))
1201        gen2 = (i for i in range(10))
1202
1203        expected = DataFrame([list(range(10)), list(range(10))])
1204        result = DataFrame([gen1, gen2])
1205        tm.assert_frame_equal(result, expected)
1206
1207        gen = ([i, "a"] for i in range(10))
1208        result = DataFrame(gen)
1209        expected = DataFrame({0: range(10), 1: "a"})
1210        tm.assert_frame_equal(result, expected, check_dtype=False)
1211
1212    def test_constructor_list_of_odicts(self):
1213        data = [
1214            OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
1215            OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
1216            OrderedDict([["a", 1.5], ["d", 6]]),
1217            OrderedDict(),
1218            OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
1219            OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
1220        ]
1221
1222        result = DataFrame(data)
1223        expected = DataFrame.from_dict(
1224            dict(zip(range(len(data)), data)), orient="index"
1225        )
1226        tm.assert_frame_equal(result, expected.reindex(result.index))
1227
1228        result = DataFrame([{}])
1229        expected = DataFrame(index=[0])
1230        tm.assert_frame_equal(result, expected)
1231
1232    def test_constructor_single_row(self):
1233        data = [OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]])]
1234
1235        result = DataFrame(data)
1236        expected = DataFrame.from_dict(dict(zip([0], data)), orient="index").reindex(
1237            result.index
1238        )
1239        tm.assert_frame_equal(result, expected)
1240
1241    @pytest.mark.parametrize("dict_type", [dict, OrderedDict])
1242    def test_constructor_ordered_dict_preserve_order(self, dict_type):
1243        # see gh-13304
1244        expected = DataFrame([[2, 1]], columns=["b", "a"])
1245
1246        data = dict_type()
1247        data["b"] = [2]
1248        data["a"] = [1]
1249
1250        result = DataFrame(data)
1251        tm.assert_frame_equal(result, expected)
1252
1253        data = dict_type()
1254        data["b"] = 2
1255        data["a"] = 1
1256
1257        result = DataFrame([data])
1258        tm.assert_frame_equal(result, expected)
1259
1260    @pytest.mark.parametrize("dict_type", [dict, OrderedDict])
1261    def test_constructor_ordered_dict_conflicting_orders(self, dict_type):
1262        # the first dict element sets the ordering for the DataFrame,
1263        # even if there are conflicting orders from subsequent ones
1264        row_one = dict_type()
1265        row_one["b"] = 2
1266        row_one["a"] = 1
1267
1268        row_two = dict_type()
1269        row_two["a"] = 1
1270        row_two["b"] = 2
1271
1272        row_three = {"b": 2, "a": 1}
1273
1274        expected = DataFrame([[2, 1], [2, 1]], columns=["b", "a"])
1275        result = DataFrame([row_one, row_two])
1276        tm.assert_frame_equal(result, expected)
1277
1278        expected = DataFrame([[2, 1], [2, 1], [2, 1]], columns=["b", "a"])
1279        result = DataFrame([row_one, row_two, row_three])
1280        tm.assert_frame_equal(result, expected)
1281
1282    def test_constructor_list_of_series(self):
1283        data = [
1284            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
1285            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
1286        ]
1287        sdict = OrderedDict(zip(["x", "y"], data))
1288        idx = Index(["a", "b", "c"])
1289
1290        # all named
1291        data2 = [
1292            Series([1.5, 3, 4], idx, dtype="O", name="x"),
1293            Series([1.5, 3, 6], idx, name="y"),
1294        ]
1295        result = DataFrame(data2)
1296        expected = DataFrame.from_dict(sdict, orient="index")
1297        tm.assert_frame_equal(result, expected)
1298
1299        # some unnamed
1300        data2 = [
1301            Series([1.5, 3, 4], idx, dtype="O", name="x"),
1302            Series([1.5, 3, 6], idx),
1303        ]
1304        result = DataFrame(data2)
1305
1306        sdict = OrderedDict(zip(["x", "Unnamed 0"], data))
1307        expected = DataFrame.from_dict(sdict, orient="index")
1308        tm.assert_frame_equal(result, expected)
1309
1310        # none named
1311        data = [
1312            OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
1313            OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
1314            OrderedDict([["a", 1.5], ["d", 6]]),
1315            OrderedDict(),
1316            OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
1317            OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
1318        ]
1319        data = [
1320            create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data
1321        ]
1322
1323        result = DataFrame(data)
1324        sdict = OrderedDict(zip(range(len(data)), data))
1325        expected = DataFrame.from_dict(sdict, orient="index")
1326        tm.assert_frame_equal(result, expected.reindex(result.index))
1327
1328        result2 = DataFrame(data, index=np.arange(6))
1329        tm.assert_frame_equal(result, result2)
1330
1331        result = DataFrame([Series(dtype=object)])
1332        expected = DataFrame(index=[0])
1333        tm.assert_frame_equal(result, expected)
1334
1335        data = [
1336            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
1337            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
1338        ]
1339        sdict = OrderedDict(zip(range(len(data)), data))
1340
1341        idx = Index(["a", "b", "c"])
1342        data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)]
1343        result = DataFrame(data2)
1344        expected = DataFrame.from_dict(sdict, orient="index")
1345        tm.assert_frame_equal(result, expected)
1346
1347    def test_constructor_list_of_series_aligned_index(self):
1348        series = [Series(i, index=["b", "a", "c"], name=str(i)) for i in range(3)]
1349        result = DataFrame(series)
1350        expected = DataFrame(
1351            {"b": [0, 1, 2], "a": [0, 1, 2], "c": [0, 1, 2]},
1352            columns=["b", "a", "c"],
1353            index=["0", "1", "2"],
1354        )
1355        tm.assert_frame_equal(result, expected)
1356
1357    def test_constructor_list_of_derived_dicts(self):
1358        class CustomDict(dict):
1359            pass
1360
1361        d = {"a": 1.5, "b": 3}
1362
1363        data_custom = [CustomDict(d)]
1364        data = [d]
1365
1366        result_custom = DataFrame(data_custom)
1367        result = DataFrame(data)
1368        tm.assert_frame_equal(result, result_custom)
1369
1370    def test_constructor_ragged(self):
1371        data = {"A": np.random.randn(10), "B": np.random.randn(8)}
1372        with pytest.raises(ValueError, match="arrays must all be same length"):
1373            DataFrame(data)
1374
1375    def test_constructor_scalar(self):
1376        idx = Index(range(3))
1377        df = DataFrame({"a": 0}, index=idx)
1378        expected = DataFrame({"a": [0, 0, 0]}, index=idx)
1379        tm.assert_frame_equal(df, expected, check_dtype=False)
1380
1381    def test_constructor_Series_copy_bug(self, float_frame):
1382        df = DataFrame(float_frame["A"], index=float_frame.index, columns=["A"])
1383        df.copy()
1384
1385    def test_constructor_mixed_dict_and_Series(self):
1386        data = {}
1387        data["A"] = {"foo": 1, "bar": 2, "baz": 3}
1388        data["B"] = Series([4, 3, 2, 1], index=["bar", "qux", "baz", "foo"])
1389
1390        result = DataFrame(data)
1391        assert result.index.is_monotonic
1392
1393        # ordering ambiguous, raise exception
1394        with pytest.raises(ValueError, match="ambiguous ordering"):
1395            DataFrame({"A": ["a", "b"], "B": {"a": "a", "b": "b"}})
1396
1397        # this is OK though
1398        result = DataFrame({"A": ["a", "b"], "B": Series(["a", "b"], index=["a", "b"])})
1399        expected = DataFrame({"A": ["a", "b"], "B": ["a", "b"]}, index=["a", "b"])
1400        tm.assert_frame_equal(result, expected)
1401
1402    def test_constructor_mixed_type_rows(self):
1403        # Issue 25075
1404        data = [[1, 2], (3, 4)]
1405        result = DataFrame(data)
1406        expected = DataFrame([[1, 2], [3, 4]])
1407        tm.assert_frame_equal(result, expected)
1408
1409    @pytest.mark.parametrize(
1410        "tuples,lists",
1411        [
1412            ((), []),
1413            ((()), []),
1414            (((), ()), [(), ()]),
1415            (((), ()), [[], []]),
1416            (([], []), [[], []]),
1417            (([1], [2]), [[1], [2]]),  # GH 32776
1418            (([1, 2, 3], [4, 5, 6]), [[1, 2, 3], [4, 5, 6]]),
1419        ],
1420    )
1421    def test_constructor_tuple(self, tuples, lists):
1422        # GH 25691
1423        result = DataFrame(tuples)
1424        expected = DataFrame(lists)
1425        tm.assert_frame_equal(result, expected)
1426
1427    def test_constructor_list_of_tuples(self):
1428        result = DataFrame({"A": [(1, 2), (3, 4)]})
1429        expected = DataFrame({"A": Series([(1, 2), (3, 4)])})
1430        tm.assert_frame_equal(result, expected)
1431
1432    def test_constructor_list_of_namedtuples(self):
1433        # GH11181
1434        from collections import namedtuple
1435
1436        named_tuple = namedtuple("Pandas", list("ab"))
1437        tuples = [named_tuple(1, 3), named_tuple(2, 4)]
1438        expected = DataFrame({"a": [1, 2], "b": [3, 4]})
1439        result = DataFrame(tuples)
1440        tm.assert_frame_equal(result, expected)
1441
1442        # with columns
1443        expected = DataFrame({"y": [1, 2], "z": [3, 4]})
1444        result = DataFrame(tuples, columns=["y", "z"])
1445        tm.assert_frame_equal(result, expected)
1446
1447    def test_constructor_list_of_dataclasses(self):
1448        # GH21910
1449        from dataclasses import make_dataclass
1450
1451        Point = make_dataclass("Point", [("x", int), ("y", int)])
1452
1453        datas = [Point(0, 3), Point(1, 3)]
1454        expected = DataFrame({"x": [0, 1], "y": [3, 3]})
1455        result = DataFrame(datas)
1456        tm.assert_frame_equal(result, expected)
1457
1458    def test_constructor_list_of_dataclasses_with_varying_types(self):
1459        # GH21910
1460        from dataclasses import make_dataclass
1461
1462        # varying types
1463        Point = make_dataclass("Point", [("x", int), ("y", int)])
1464        HLine = make_dataclass("HLine", [("x0", int), ("x1", int), ("y", int)])
1465
1466        datas = [Point(0, 3), HLine(1, 3, 3)]
1467
1468        expected = DataFrame(
1469            {"x": [0, np.nan], "y": [3, 3], "x0": [np.nan, 1], "x1": [np.nan, 3]}
1470        )
1471        result = DataFrame(datas)
1472        tm.assert_frame_equal(result, expected)
1473
1474    def test_constructor_list_of_dataclasses_error_thrown(self):
1475        # GH21910
1476        from dataclasses import make_dataclass
1477
1478        Point = make_dataclass("Point", [("x", int), ("y", int)])
1479
1480        # expect TypeError
1481        msg = "asdict() should be called on dataclass instances"
1482        with pytest.raises(TypeError, match=re.escape(msg)):
1483            DataFrame([Point(0, 0), {"x": 1, "y": 0}])
1484
1485    def test_constructor_list_of_dict_order(self):
1486        # GH10056
1487        data = [
1488            {"First": 1, "Second": 4, "Third": 7, "Fourth": 10},
1489            {"Second": 5, "First": 2, "Fourth": 11, "Third": 8},
1490            {"Second": 6, "First": 3, "Fourth": 12, "Third": 9, "YYY": 14, "XXX": 13},
1491        ]
1492        expected = DataFrame(
1493            {
1494                "First": [1, 2, 3],
1495                "Second": [4, 5, 6],
1496                "Third": [7, 8, 9],
1497                "Fourth": [10, 11, 12],
1498                "YYY": [None, None, 14],
1499                "XXX": [None, None, 13],
1500            }
1501        )
1502        result = DataFrame(data)
1503        tm.assert_frame_equal(result, expected)
1504
1505    def test_constructor_orient(self, float_string_frame):
1506        data_dict = float_string_frame.T._series
1507        recons = DataFrame.from_dict(data_dict, orient="index")
1508        expected = float_string_frame.reindex(index=recons.index)
1509        tm.assert_frame_equal(recons, expected)
1510
1511        # dict of sequence
1512        a = {"hi": [32, 3, 3], "there": [3, 5, 3]}
1513        rs = DataFrame.from_dict(a, orient="index")
1514        xp = DataFrame.from_dict(a).T.reindex(list(a.keys()))
1515        tm.assert_frame_equal(rs, xp)
1516
1517    def test_constructor_from_ordered_dict(self):
1518        # GH8425
1519        a = OrderedDict(
1520            [
1521                ("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])),
1522                ("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])),
1523                ("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])),
1524            ]
1525        )
1526        expected = DataFrame.from_dict(a, orient="columns").T
1527        result = DataFrame.from_dict(a, orient="index")
1528        tm.assert_frame_equal(result, expected)
1529
1530    def test_from_dict_columns_parameter(self):
1531        # GH 18529
1532        # Test new columns parameter for from_dict that was added to make
1533        # from_items(..., orient='index', columns=[...]) easier to replicate
1534        result = DataFrame.from_dict(
1535            OrderedDict([("A", [1, 2]), ("B", [4, 5])]),
1536            orient="index",
1537            columns=["one", "two"],
1538        )
1539        expected = DataFrame([[1, 2], [4, 5]], index=["A", "B"], columns=["one", "two"])
1540        tm.assert_frame_equal(result, expected)
1541
1542        msg = "cannot use columns parameter with orient='columns'"
1543        with pytest.raises(ValueError, match=msg):
1544            DataFrame.from_dict(
1545                {"A": [1, 2], "B": [4, 5]},
1546                orient="columns",
1547                columns=["one", "two"],
1548            )
1549        with pytest.raises(ValueError, match=msg):
1550            DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"])
1551
1552    @pytest.mark.parametrize(
1553        "data_dict, keys, orient",
1554        [
1555            ({}, [], "index"),
1556            ([{("a",): 1}, {("a",): 2}], [("a",)], "columns"),
1557            ([OrderedDict([(("a",), 1), (("b",), 2)])], [("a",), ("b",)], "columns"),
1558            ([{("a", "b"): 1}], [("a", "b")], "columns"),
1559        ],
1560    )
1561    def test_constructor_from_dict_tuples(self, data_dict, keys, orient):
1562        # GH 16769
1563        df = DataFrame.from_dict(data_dict, orient)
1564
1565        result = df.columns
1566        expected = Index(keys, dtype="object", tupleize_cols=False)
1567
1568        tm.assert_index_equal(result, expected)
1569
1570    def test_frame_dict_constructor_empty_series(self):
1571        s1 = Series(
1572            [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])
1573        )
1574        s2 = Series(
1575            [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])
1576        )
1577        s3 = Series(dtype=object)
1578
1579        # it works!
1580        DataFrame({"foo": s1, "bar": s2, "baz": s3})
1581        DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2})
1582
1583    def test_constructor_Series_named(self):
1584        a = Series([1, 2, 3], index=["a", "b", "c"], name="x")
1585        df = DataFrame(a)
1586        assert df.columns[0] == "x"
1587        tm.assert_index_equal(df.index, a.index)
1588
1589        # ndarray like
1590        arr = np.random.randn(10)
1591        s = Series(arr, name="x")
1592        df = DataFrame(s)
1593        expected = DataFrame({"x": s})
1594        tm.assert_frame_equal(df, expected)
1595
1596        s = Series(arr, index=range(3, 13))
1597        df = DataFrame(s)
1598        expected = DataFrame({0: s})
1599        tm.assert_frame_equal(df, expected)
1600
1601        msg = r"Shape of passed values is \(10, 1\), indices imply \(10, 2\)"
1602        with pytest.raises(ValueError, match=msg):
1603            DataFrame(s, columns=[1, 2])
1604
1605        # #2234
1606        a = Series([], name="x", dtype=object)
1607        df = DataFrame(a)
1608        assert df.columns[0] == "x"
1609
1610        # series with name and w/o
1611        s1 = Series(arr, name="x")
1612        df = DataFrame([s1, arr]).T
1613        expected = DataFrame({"x": s1, "Unnamed 0": arr}, columns=["x", "Unnamed 0"])
1614        tm.assert_frame_equal(df, expected)
1615
1616        # this is a bit non-intuitive here; the series collapse down to arrays
1617        df = DataFrame([arr, s1]).T
1618        expected = DataFrame({1: s1, 0: arr}, columns=[0, 1])
1619        tm.assert_frame_equal(df, expected)
1620
1621    def test_constructor_Series_named_and_columns(self):
1622        # GH 9232 validation
1623
1624        s0 = Series(range(5), name=0)
1625        s1 = Series(range(5), name=1)
1626
1627        # matching name and column gives standard frame
1628        tm.assert_frame_equal(DataFrame(s0, columns=[0]), s0.to_frame())
1629        tm.assert_frame_equal(DataFrame(s1, columns=[1]), s1.to_frame())
1630
1631        # non-matching produces empty frame
1632        assert DataFrame(s0, columns=[1]).empty
1633        assert DataFrame(s1, columns=[0]).empty
1634
1635    def test_constructor_Series_differently_indexed(self):
1636        # name
1637        s1 = Series([1, 2, 3], index=["a", "b", "c"], name="x")
1638
1639        # no name
1640        s2 = Series([1, 2, 3], index=["a", "b", "c"])
1641
1642        other_index = Index(["a", "b"])
1643
1644        df1 = DataFrame(s1, index=other_index)
1645        exp1 = DataFrame(s1.reindex(other_index))
1646        assert df1.columns[0] == "x"
1647        tm.assert_frame_equal(df1, exp1)
1648
1649        df2 = DataFrame(s2, index=other_index)
1650        exp2 = DataFrame(s2.reindex(other_index))
1651        assert df2.columns[0] == 0
1652        tm.assert_index_equal(df2.index, other_index)
1653        tm.assert_frame_equal(df2, exp2)
1654
1655    @pytest.mark.parametrize(
1656        "name_in1,name_in2,name_in3,name_out",
1657        [
1658            ("idx", "idx", "idx", "idx"),
1659            ("idx", "idx", None, None),
1660            ("idx", None, None, None),
1661            ("idx1", "idx2", None, None),
1662            ("idx1", "idx1", "idx2", None),
1663            ("idx1", "idx2", "idx3", None),
1664            (None, None, None, None),
1665        ],
1666    )
1667    def test_constructor_index_names(self, name_in1, name_in2, name_in3, name_out):
1668        # GH13475
1669        indices = [
1670            Index(["a", "b", "c"], name=name_in1),
1671            Index(["b", "c", "d"], name=name_in2),
1672            Index(["c", "d", "e"], name=name_in3),
1673        ]
1674        series = {
1675            c: Series([0, 1, 2], index=i) for i, c in zip(indices, ["x", "y", "z"])
1676        }
1677        result = DataFrame(series)
1678
1679        exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out)
1680        expected = DataFrame(
1681            {
1682                "x": [0, 1, 2, np.nan, np.nan],
1683                "y": [np.nan, 0, 1, 2, np.nan],
1684                "z": [np.nan, np.nan, 0, 1, 2],
1685            },
1686            index=exp_ind,
1687        )
1688
1689        tm.assert_frame_equal(result, expected)
1690
1691    def test_constructor_manager_resize(self, float_frame):
1692        index = list(float_frame.index[:5])
1693        columns = list(float_frame.columns[:3])
1694
1695        result = DataFrame(float_frame._mgr, index=index, columns=columns)
1696        tm.assert_index_equal(result.index, Index(index))
1697        tm.assert_index_equal(result.columns, Index(columns))
1698
1699    def test_constructor_mix_series_nonseries(self, float_frame):
1700        df = DataFrame(
1701            {"A": float_frame["A"], "B": list(float_frame["B"])}, columns=["A", "B"]
1702        )
1703        tm.assert_frame_equal(df, float_frame.loc[:, ["A", "B"]])
1704
1705        msg = "does not match index length"
1706        with pytest.raises(ValueError, match=msg):
1707            DataFrame({"A": float_frame["A"], "B": list(float_frame["B"])[:-2]})
1708
1709    def test_constructor_miscast_na_int_dtype(self):
1710        df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64)
1711        expected = DataFrame([[np.nan, 1], [1, 0]])
1712        tm.assert_frame_equal(df, expected)
1713
1714    def test_constructor_column_duplicates(self):
1715        # it works! #2079
1716        df = DataFrame([[8, 5]], columns=["a", "a"])
1717        edf = DataFrame([[8, 5]])
1718        edf.columns = ["a", "a"]
1719
1720        tm.assert_frame_equal(df, edf)
1721
1722        idf = DataFrame.from_records([(8, 5)], columns=["a", "a"])
1723
1724        tm.assert_frame_equal(idf, edf)
1725
1726        msg = "If using all scalar values, you must pass an index"
1727        with pytest.raises(ValueError, match=msg):
1728            DataFrame.from_dict(OrderedDict([("b", 8), ("a", 5), ("a", 6)]))
1729
1730    def test_constructor_empty_with_string_dtype(self):
1731        # GH 9428
1732        expected = DataFrame(index=[0, 1], columns=[0, 1], dtype=object)
1733
1734        df = DataFrame(index=[0, 1], columns=[0, 1], dtype=str)
1735        tm.assert_frame_equal(df, expected)
1736        df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.str_)
1737        tm.assert_frame_equal(df, expected)
1738        df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.unicode_)
1739        tm.assert_frame_equal(df, expected)
1740        df = DataFrame(index=[0, 1], columns=[0, 1], dtype="U5")
1741        tm.assert_frame_equal(df, expected)
1742
1743    def test_constructor_empty_with_string_extension(self):
1744        # GH 34915
1745        expected = DataFrame(index=[], columns=["c1"], dtype="string")
1746        df = DataFrame(columns=["c1"], dtype="string")
1747        tm.assert_frame_equal(df, expected)
1748
1749    def test_constructor_single_value(self):
1750        # expecting single value upcasting here
1751        df = DataFrame(0.0, index=[1, 2, 3], columns=["a", "b", "c"])
1752        tm.assert_frame_equal(
1753            df, DataFrame(np.zeros(df.shape).astype("float64"), df.index, df.columns)
1754        )
1755
1756        df = DataFrame(0, index=[1, 2, 3], columns=["a", "b", "c"])
1757        tm.assert_frame_equal(
1758            df, DataFrame(np.zeros(df.shape).astype("int64"), df.index, df.columns)
1759        )
1760
1761        df = DataFrame("a", index=[1, 2], columns=["a", "c"])
1762        tm.assert_frame_equal(
1763            df,
1764            DataFrame(
1765                np.array([["a", "a"], ["a", "a"]], dtype=object),
1766                index=[1, 2],
1767                columns=["a", "c"],
1768            ),
1769        )
1770
1771        msg = "DataFrame constructor not properly called!"
1772        with pytest.raises(ValueError, match=msg):
1773            DataFrame("a", [1, 2])
1774        with pytest.raises(ValueError, match=msg):
1775            DataFrame("a", columns=["a", "c"])
1776
1777        msg = "incompatible data and dtype"
1778        with pytest.raises(TypeError, match=msg):
1779            DataFrame("a", [1, 2], ["a", "c"], float)
1780
1781    def test_constructor_with_datetimes(self):
1782        intname = np.dtype(np.int_).name
1783        floatname = np.dtype(np.float_).name
1784        datetime64name = np.dtype("M8[ns]").name
1785        objectname = np.dtype(np.object_).name
1786
1787        # single item
1788        df = DataFrame(
1789            {
1790                "A": 1,
1791                "B": "foo",
1792                "C": "bar",
1793                "D": Timestamp("20010101"),
1794                "E": datetime(2001, 1, 2, 0, 0),
1795            },
1796            index=np.arange(10),
1797        )
1798        result = df.dtypes
1799        expected = Series(
1800            [np.dtype("int64")]
1801            + [np.dtype(objectname)] * 2
1802            + [np.dtype(datetime64name)] * 2,
1803            index=list("ABCDE"),
1804        )
1805        tm.assert_series_equal(result, expected)
1806
1807        # check with ndarray construction ndim==0 (e.g. we are passing a ndim 0
1808        # ndarray with a dtype specified)
1809        df = DataFrame(
1810            {
1811                "a": 1.0,
1812                "b": 2,
1813                "c": "foo",
1814                floatname: np.array(1.0, dtype=floatname),
1815                intname: np.array(1, dtype=intname),
1816            },
1817            index=np.arange(10),
1818        )
1819        result = df.dtypes
1820        expected = Series(
1821            [np.dtype("float64")]
1822            + [np.dtype("int64")]
1823            + [np.dtype("object")]
1824            + [np.dtype("float64")]
1825            + [np.dtype(intname)],
1826            index=["a", "b", "c", floatname, intname],
1827        )
1828        tm.assert_series_equal(result, expected)
1829
1830        # check with ndarray construction ndim>0
1831        df = DataFrame(
1832            {
1833                "a": 1.0,
1834                "b": 2,
1835                "c": "foo",
1836                floatname: np.array([1.0] * 10, dtype=floatname),
1837                intname: np.array([1] * 10, dtype=intname),
1838            },
1839            index=np.arange(10),
1840        )
1841        result = df.dtypes
1842        expected = Series(
1843            [np.dtype("float64")]
1844            + [np.dtype("int64")]
1845            + [np.dtype("object")]
1846            + [np.dtype("float64")]
1847            + [np.dtype(intname)],
1848            index=["a", "b", "c", floatname, intname],
1849        )
1850        tm.assert_series_equal(result, expected)
1851
1852        # GH 2809
1853        ind = date_range(start="2000-01-01", freq="D", periods=10)
1854        datetimes = [ts.to_pydatetime() for ts in ind]
1855        datetime_s = Series(datetimes)
1856        assert datetime_s.dtype == "M8[ns]"
1857
1858        # GH 2810
1859        ind = date_range(start="2000-01-01", freq="D", periods=10)
1860        datetimes = [ts.to_pydatetime() for ts in ind]
1861        dates = [ts.date() for ts in ind]
1862        df = DataFrame(datetimes, columns=["datetimes"])
1863        df["dates"] = dates
1864        result = df.dtypes
1865        expected = Series(
1866            [np.dtype("datetime64[ns]"), np.dtype("object")],
1867            index=["datetimes", "dates"],
1868        )
1869        tm.assert_series_equal(result, expected)
1870
1871        # GH 7594
1872        # don't coerce tz-aware
1873        import pytz
1874
1875        tz = pytz.timezone("US/Eastern")
1876        dt = tz.localize(datetime(2012, 1, 1))
1877
1878        df = DataFrame({"End Date": dt}, index=[0])
1879        assert df.iat[0, 0] == dt
1880        tm.assert_series_equal(
1881            df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"})
1882        )
1883
1884        df = DataFrame([{"End Date": dt}])
1885        assert df.iat[0, 0] == dt
1886        tm.assert_series_equal(
1887            df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"})
1888        )
1889
1890        # tz-aware (UTC and other tz's)
1891        # GH 8411
1892        dr = date_range("20130101", periods=3)
1893        df = DataFrame({"value": dr})
1894        assert df.iat[0, 0].tz is None
1895        dr = date_range("20130101", periods=3, tz="UTC")
1896        df = DataFrame({"value": dr})
1897        assert str(df.iat[0, 0].tz) == "UTC"
1898        dr = date_range("20130101", periods=3, tz="US/Eastern")
1899        df = DataFrame({"value": dr})
1900        assert str(df.iat[0, 0].tz) == "US/Eastern"
1901
1902        # GH 7822
1903        # preserver an index with a tz on dict construction
1904        i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern")
1905
1906        expected = DataFrame({"a": i.to_series().reset_index(drop=True)})
1907        df = DataFrame()
1908        df["a"] = i
1909        tm.assert_frame_equal(df, expected)
1910
1911        df = DataFrame({"a": i})
1912        tm.assert_frame_equal(df, expected)
1913
1914        # multiples
1915        i_no_tz = date_range("1/1/2011", periods=5, freq="10s")
1916        df = DataFrame({"a": i, "b": i_no_tz})
1917        expected = DataFrame({"a": i.to_series().reset_index(drop=True), "b": i_no_tz})
1918        tm.assert_frame_equal(df, expected)
1919
1920    @pytest.mark.parametrize(
1921        "arr",
1922        [
1923            np.array([None, None, None, None, datetime.now(), None]),
1924            np.array([None, None, datetime.now(), None]),
1925            [[np.datetime64("NaT")], [None]],
1926            [[np.datetime64("NaT")], [pd.NaT]],
1927            [[None], [np.datetime64("NaT")]],
1928            [[None], [pd.NaT]],
1929            [[pd.NaT], [np.datetime64("NaT")]],
1930            [[pd.NaT], [None]],
1931        ],
1932    )
1933    def test_constructor_datetimes_with_nulls(self, arr):
1934        # gh-15869, GH#11220
1935        result = DataFrame(arr).dtypes
1936        expected = Series([np.dtype("datetime64[ns]")])
1937        tm.assert_series_equal(result, expected)
1938
1939    @pytest.mark.parametrize("order", ["K", "A", "C", "F"])
1940    @pytest.mark.parametrize(
1941        "dtype",
1942        [
1943            "datetime64[M]",
1944            "datetime64[D]",
1945            "datetime64[h]",
1946            "datetime64[m]",
1947            "datetime64[s]",
1948            "datetime64[ms]",
1949            "datetime64[us]",
1950            "datetime64[ns]",
1951        ],
1952    )
1953    def test_constructor_datetimes_non_ns(self, order, dtype):
1954        na = np.array(
1955            [
1956                ["2015-01-01", "2015-01-02", "2015-01-03"],
1957                ["2017-01-01", "2017-01-02", "2017-02-03"],
1958            ],
1959            dtype=dtype,
1960            order=order,
1961        )
1962        df = DataFrame(na)
1963        expected = DataFrame(
1964            [
1965                ["2015-01-01", "2015-01-02", "2015-01-03"],
1966                ["2017-01-01", "2017-01-02", "2017-02-03"],
1967            ]
1968        )
1969        expected = expected.astype(dtype=dtype)
1970        tm.assert_frame_equal(df, expected)
1971
1972    @pytest.mark.parametrize("order", ["K", "A", "C", "F"])
1973    @pytest.mark.parametrize(
1974        "dtype",
1975        [
1976            "timedelta64[D]",
1977            "timedelta64[h]",
1978            "timedelta64[m]",
1979            "timedelta64[s]",
1980            "timedelta64[ms]",
1981            "timedelta64[us]",
1982            "timedelta64[ns]",
1983        ],
1984    )
1985    def test_constructor_timedelta_non_ns(self, order, dtype):
1986        na = np.array(
1987            [
1988                [np.timedelta64(1, "D"), np.timedelta64(2, "D")],
1989                [np.timedelta64(4, "D"), np.timedelta64(5, "D")],
1990            ],
1991            dtype=dtype,
1992            order=order,
1993        )
1994        df = DataFrame(na).astype("timedelta64[ns]")
1995        expected = DataFrame(
1996            [
1997                [Timedelta(1, "D"), Timedelta(2, "D")],
1998                [Timedelta(4, "D"), Timedelta(5, "D")],
1999            ],
2000        )
2001        tm.assert_frame_equal(df, expected)
2002
2003    def test_constructor_for_list_with_dtypes(self):
2004        # test list of lists/ndarrays
2005        df = DataFrame([np.arange(5) for x in range(5)])
2006        result = df.dtypes
2007        expected = Series([np.dtype("int64")] * 5)
2008        tm.assert_series_equal(result, expected)
2009
2010        df = DataFrame([np.array(np.arange(5), dtype="int32") for x in range(5)])
2011        result = df.dtypes
2012        expected = Series([np.dtype("int64")] * 5)
2013        tm.assert_series_equal(result, expected)
2014
2015        # overflow issue? (we always expecte int64 upcasting here)
2016        df = DataFrame({"a": [2 ** 31, 2 ** 31 + 1]})
2017        assert df.dtypes.iloc[0] == np.dtype("int64")
2018
2019        # GH #2751 (construction with no index specified), make sure we cast to
2020        # platform values
2021        df = DataFrame([1, 2])
2022        assert df.dtypes.iloc[0] == np.dtype("int64")
2023
2024        df = DataFrame([1.0, 2.0])
2025        assert df.dtypes.iloc[0] == np.dtype("float64")
2026
2027        df = DataFrame({"a": [1, 2]})
2028        assert df.dtypes.iloc[0] == np.dtype("int64")
2029
2030        df = DataFrame({"a": [1.0, 2.0]})
2031        assert df.dtypes.iloc[0] == np.dtype("float64")
2032
2033        df = DataFrame({"a": 1}, index=range(3))
2034        assert df.dtypes.iloc[0] == np.dtype("int64")
2035
2036        df = DataFrame({"a": 1.0}, index=range(3))
2037        assert df.dtypes.iloc[0] == np.dtype("float64")
2038
2039        # with object list
2040        df = DataFrame(
2041            {
2042                "a": [1, 2, 4, 7],
2043                "b": [1.2, 2.3, 5.1, 6.3],
2044                "c": list("abcd"),
2045                "d": [datetime(2000, 1, 1) for i in range(4)],
2046                "e": [1.0, 2, 4.0, 7],
2047            }
2048        )
2049        result = df.dtypes
2050        expected = Series(
2051            [
2052                np.dtype("int64"),
2053                np.dtype("float64"),
2054                np.dtype("object"),
2055                np.dtype("datetime64[ns]"),
2056                np.dtype("float64"),
2057            ],
2058            index=list("abcde"),
2059        )
2060        tm.assert_series_equal(result, expected)
2061
2062    def test_constructor_frame_copy(self, float_frame):
2063        cop = DataFrame(float_frame, copy=True)
2064        cop["A"] = 5
2065        assert (cop["A"] == 5).all()
2066        assert not (float_frame["A"] == 5).all()
2067
2068    def test_constructor_ndarray_copy(self, float_frame):
2069        df = DataFrame(float_frame.values)
2070
2071        float_frame.values[5] = 5
2072        assert (df.values[5] == 5).all()
2073
2074        df = DataFrame(float_frame.values, copy=True)
2075        float_frame.values[6] = 6
2076        assert not (df.values[6] == 6).all()
2077
2078    def test_constructor_series_copy(self, float_frame):
2079        series = float_frame._series
2080
2081        df = DataFrame({"A": series["A"]})
2082        df["A"][:] = 5
2083
2084        assert not (series["A"] == 5).all()
2085
2086    def test_constructor_with_nas(self):
2087        # GH 5016
2088        # na's in indices
2089
2090        def check(df):
2091            for i in range(len(df.columns)):
2092                df.iloc[:, i]
2093
2094            indexer = np.arange(len(df.columns))[isna(df.columns)]
2095
2096            # No NaN found -> error
2097            if len(indexer) == 0:
2098                with pytest.raises(KeyError, match="^nan$"):
2099                    df.loc[:, np.nan]
2100            # single nan should result in Series
2101            elif len(indexer) == 1:
2102                tm.assert_series_equal(df.iloc[:, indexer[0]], df.loc[:, np.nan])
2103            # multiple nans should result in DataFrame
2104            else:
2105                tm.assert_frame_equal(df.iloc[:, indexer], df.loc[:, np.nan])
2106
2107        df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan])
2108        check(df)
2109
2110        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1.1, 2.2, np.nan])
2111        check(df)
2112
2113        df = DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan])
2114        check(df)
2115
2116        df = DataFrame(
2117            [[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan]
2118        )
2119        check(df)
2120
2121        # GH 21428 (non-unique columns)
2122        df = DataFrame([[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1, 2, 2])
2123        check(df)
2124
2125    def test_constructor_lists_to_object_dtype(self):
2126        # from #1074
2127        d = DataFrame({"a": [np.nan, False]})
2128        assert d["a"].dtype == np.object_
2129        assert not d["a"][1]
2130
2131    def test_constructor_categorical(self):
2132
2133        # GH8626
2134
2135        # dict creation
2136        df = DataFrame({"A": list("abc")}, dtype="category")
2137        expected = Series(list("abc"), dtype="category", name="A")
2138        tm.assert_series_equal(df["A"], expected)
2139
2140        # to_frame
2141        s = Series(list("abc"), dtype="category")
2142        result = s.to_frame()
2143        expected = Series(list("abc"), dtype="category", name=0)
2144        tm.assert_series_equal(result[0], expected)
2145        result = s.to_frame(name="foo")
2146        expected = Series(list("abc"), dtype="category", name="foo")
2147        tm.assert_series_equal(result["foo"], expected)
2148
2149        # list-like creation
2150        df = DataFrame(list("abc"), dtype="category")
2151        expected = Series(list("abc"), dtype="category", name=0)
2152        tm.assert_series_equal(df[0], expected)
2153
2154        # ndim != 1
2155        df = DataFrame([Categorical(list("abc"))])
2156        expected = DataFrame({0: Series(list("abc"), dtype="category")})
2157        tm.assert_frame_equal(df, expected)
2158
2159        df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))])
2160        expected = DataFrame(
2161            {
2162                0: Series(list("abc"), dtype="category"),
2163                1: Series(list("abd"), dtype="category"),
2164            },
2165            columns=[0, 1],
2166        )
2167        tm.assert_frame_equal(df, expected)
2168
2169        # mixed
2170        df = DataFrame([Categorical(list("abc")), list("def")])
2171        expected = DataFrame(
2172            {0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1]
2173        )
2174        tm.assert_frame_equal(df, expected)
2175
2176        # invalid (shape)
2177        msg = r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)"
2178        with pytest.raises(ValueError, match=msg):
2179            DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])
2180
2181        # ndim > 1
2182        msg = "> 1 ndim Categorical are not supported at this time"
2183        with pytest.raises(NotImplementedError, match=msg):
2184            Categorical(np.array([list("abcd")]))
2185
2186    def test_constructor_categorical_series(self):
2187
2188        items = [1, 2, 3, 1]
2189        exp = Series(items).astype("category")
2190        res = Series(items, dtype="category")
2191        tm.assert_series_equal(res, exp)
2192
2193        items = ["a", "b", "c", "a"]
2194        exp = Series(items).astype("category")
2195        res = Series(items, dtype="category")
2196        tm.assert_series_equal(res, exp)
2197
2198        # insert into frame with different index
2199        # GH 8076
2200        index = date_range("20000101", periods=3)
2201        expected = Series(
2202            Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
2203        )
2204        expected.index = index
2205
2206        expected = DataFrame({"x": expected})
2207        df = DataFrame({"x": Series(["a", "b", "c"], dtype="category")}, index=index)
2208        tm.assert_frame_equal(df, expected)
2209
2210    def test_from_records_to_records(self):
2211        # from numpy documentation
2212        arr = np.zeros((2,), dtype=("i4,f4,a10"))
2213        arr[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
2214
2215        # TODO(wesm): unused
2216        frame = DataFrame.from_records(arr)  # noqa
2217
2218        index = Index(np.arange(len(arr))[::-1])
2219        indexed_frame = DataFrame.from_records(arr, index=index)
2220        tm.assert_index_equal(indexed_frame.index, index)
2221
2222        # without names, it should go to last ditch
2223        arr2 = np.zeros((2, 3))
2224        tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2))
2225
2226        # wrong length
2227        msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
2228        with pytest.raises(ValueError, match=msg):
2229            DataFrame.from_records(arr, index=index[:-1])
2230
2231        indexed_frame = DataFrame.from_records(arr, index="f1")
2232
2233        # what to do?
2234        records = indexed_frame.to_records()
2235        assert len(records.dtype.names) == 3
2236
2237        records = indexed_frame.to_records(index=False)
2238        assert len(records.dtype.names) == 2
2239        assert "index" not in records.dtype.names
2240
2241    def test_from_records_nones(self):
2242        tuples = [(1, 2, None, 3), (1, 2, None, 3), (None, 2, 5, 3)]
2243
2244        df = DataFrame.from_records(tuples, columns=["a", "b", "c", "d"])
2245        assert np.isnan(df["c"][0])
2246
2247    def test_from_records_iterator(self):
2248        arr = np.array(
2249            [(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5.0, 5.0, 6, 6), (7.0, 7.0, 8, 8)],
2250            dtype=[
2251                ("x", np.float64),
2252                ("u", np.float32),
2253                ("y", np.int64),
2254                ("z", np.int32),
2255            ],
2256        )
2257        df = DataFrame.from_records(iter(arr), nrows=2)
2258        xp = DataFrame(
2259            {
2260                "x": np.array([1.0, 3.0], dtype=np.float64),
2261                "u": np.array([1.0, 3.0], dtype=np.float32),
2262                "y": np.array([2, 4], dtype=np.int64),
2263                "z": np.array([2, 4], dtype=np.int32),
2264            }
2265        )
2266        tm.assert_frame_equal(df.reindex_like(xp), xp)
2267
2268        # no dtypes specified here, so just compare with the default
2269        arr = [(1.0, 2), (3.0, 4), (5.0, 6), (7.0, 8)]
2270        df = DataFrame.from_records(iter(arr), columns=["x", "y"], nrows=2)
2271        tm.assert_frame_equal(df, xp.reindex(columns=["x", "y"]), check_dtype=False)
2272
2273    def test_from_records_tuples_generator(self):
2274        def tuple_generator(length):
2275            for i in range(length):
2276                letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2277                yield (i, letters[i % len(letters)], i / length)
2278
2279        columns_names = ["Integer", "String", "Float"]
2280        columns = [
2281            [i[j] for i in tuple_generator(10)] for j in range(len(columns_names))
2282        ]
2283        data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]}
2284        expected = DataFrame(data, columns=columns_names)
2285
2286        generator = tuple_generator(10)
2287        result = DataFrame.from_records(generator, columns=columns_names)
2288        tm.assert_frame_equal(result, expected)
2289
2290    def test_from_records_lists_generator(self):
2291        def list_generator(length):
2292            for i in range(length):
2293                letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2294                yield [i, letters[i % len(letters)], i / length]
2295
2296        columns_names = ["Integer", "String", "Float"]
2297        columns = [
2298            [i[j] for i in list_generator(10)] for j in range(len(columns_names))
2299        ]
2300        data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]}
2301        expected = DataFrame(data, columns=columns_names)
2302
2303        generator = list_generator(10)
2304        result = DataFrame.from_records(generator, columns=columns_names)
2305        tm.assert_frame_equal(result, expected)
2306
2307    def test_from_records_columns_not_modified(self):
2308        tuples = [(1, 2, 3), (1, 2, 3), (2, 5, 3)]
2309
2310        columns = ["a", "b", "c"]
2311        original_columns = list(columns)
2312
2313        df = DataFrame.from_records(tuples, columns=columns, index="a")  # noqa
2314
2315        assert columns == original_columns
2316
2317    def test_from_records_decimal(self):
2318        from decimal import Decimal
2319
2320        tuples = [(Decimal("1.5"),), (Decimal("2.5"),), (None,)]
2321
2322        df = DataFrame.from_records(tuples, columns=["a"])
2323        assert df["a"].dtype == object
2324
2325        df = DataFrame.from_records(tuples, columns=["a"], coerce_float=True)
2326        assert df["a"].dtype == np.float64
2327        assert np.isnan(df["a"].values[-1])
2328
2329    def test_from_records_duplicates(self):
2330        result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"])
2331
2332        expected = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"])
2333
2334        tm.assert_frame_equal(result, expected)
2335
2336    def test_from_records_set_index_name(self):
2337        def create_dict(order_id):
2338            return {
2339                "order_id": order_id,
2340                "quantity": np.random.randint(1, 10),
2341                "price": np.random.randint(1, 10),
2342            }
2343
2344        documents = [create_dict(i) for i in range(10)]
2345        # demo missing data
2346        documents.append({"order_id": 10, "quantity": 5})
2347
2348        result = DataFrame.from_records(documents, index="order_id")
2349        assert result.index.name == "order_id"
2350
2351        # MultiIndex
2352        result = DataFrame.from_records(documents, index=["order_id", "quantity"])
2353        assert result.index.names == ("order_id", "quantity")
2354
2355    def test_from_records_misc_brokenness(self):
2356        # #2179
2357
2358        data = {1: ["foo"], 2: ["bar"]}
2359
2360        result = DataFrame.from_records(data, columns=["a", "b"])
2361        exp = DataFrame(data, columns=["a", "b"])
2362        tm.assert_frame_equal(result, exp)
2363
2364        # overlap in index/index_names
2365
2366        data = {"a": [1, 2, 3], "b": [4, 5, 6]}
2367
2368        result = DataFrame.from_records(data, index=["a", "b", "c"])
2369        exp = DataFrame(data, index=["a", "b", "c"])
2370        tm.assert_frame_equal(result, exp)
2371
2372        # GH 2623
2373        rows = []
2374        rows.append([datetime(2010, 1, 1), 1])
2375        rows.append([datetime(2010, 1, 2), "hi"])  # test col upconverts to obj
2376        df2_obj = DataFrame.from_records(rows, columns=["date", "test"])
2377        result = df2_obj.dtypes
2378        expected = Series(
2379            [np.dtype("datetime64[ns]"), np.dtype("object")], index=["date", "test"]
2380        )
2381        tm.assert_series_equal(result, expected)
2382
2383        rows = []
2384        rows.append([datetime(2010, 1, 1), 1])
2385        rows.append([datetime(2010, 1, 2), 1])
2386        df2_obj = DataFrame.from_records(rows, columns=["date", "test"])
2387        result = df2_obj.dtypes
2388        expected = Series(
2389            [np.dtype("datetime64[ns]"), np.dtype("int64")], index=["date", "test"]
2390        )
2391        tm.assert_series_equal(result, expected)
2392
2393    def test_from_records_empty(self):
2394        # 3562
2395        result = DataFrame.from_records([], columns=["a", "b", "c"])
2396        expected = DataFrame(columns=["a", "b", "c"])
2397        tm.assert_frame_equal(result, expected)
2398
2399        result = DataFrame.from_records([], columns=["a", "b", "b"])
2400        expected = DataFrame(columns=["a", "b", "b"])
2401        tm.assert_frame_equal(result, expected)
2402
2403    def test_from_records_empty_with_nonempty_fields_gh3682(self):
2404        a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)])
2405        df = DataFrame.from_records(a, index="id")
2406        tm.assert_index_equal(df.index, Index([1], name="id"))
2407        assert df.index.name == "id"
2408        tm.assert_index_equal(df.columns, Index(["value"]))
2409
2410        b = np.array([], dtype=[("id", np.int64), ("value", np.int64)])
2411        df = DataFrame.from_records(b, index="id")
2412        tm.assert_index_equal(df.index, Index([], name="id"))
2413        assert df.index.name == "id"
2414
2415    @pytest.mark.parametrize(
2416        "dtype",
2417        tm.ALL_INT_DTYPES
2418        + tm.ALL_EA_INT_DTYPES
2419        + tm.FLOAT_DTYPES
2420        + tm.COMPLEX_DTYPES
2421        + tm.DATETIME64_DTYPES
2422        + tm.TIMEDELTA64_DTYPES
2423        + tm.BOOL_DTYPES,
2424    )
2425    def test_check_dtype_empty_numeric_column(self, dtype):
2426        # GH24386: Ensure dtypes are set correctly for an empty DataFrame.
2427        # Empty DataFrame is generated via dictionary data with non-overlapping columns.
2428        data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype)
2429
2430        assert data.b.dtype == dtype
2431
2432    @pytest.mark.parametrize(
2433        "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES
2434    )
2435    def test_check_dtype_empty_string_column(self, dtype):
2436        # GH24386: Ensure dtypes are set correctly for an empty DataFrame.
2437        # Empty DataFrame is generated via dictionary data with non-overlapping columns.
2438        data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype)
2439
2440        assert data.b.dtype.name == "object"
2441
2442    def test_from_records_with_datetimes(self):
2443
2444        # this may fail on certain platforms because of a numpy issue
2445        # related GH6140
2446        if not is_platform_little_endian():
2447            pytest.skip("known failure of test on non-little endian")
2448
2449        # construction with a null in a recarray
2450        # GH 6140
2451        expected = DataFrame({"EXPIRY": [datetime(2005, 3, 1, 0, 0), None]})
2452
2453        arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
2454        dtypes = [("EXPIRY", "<M8[ns]")]
2455
2456        try:
2457            recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
2458        except (ValueError):
2459            pytest.skip("known failure of numpy rec array creation")
2460
2461        result = DataFrame.from_records(recarray)
2462        tm.assert_frame_equal(result, expected)
2463
2464        # coercion should work too
2465        arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
2466        dtypes = [("EXPIRY", "<M8[m]")]
2467        recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
2468        result = DataFrame.from_records(recarray)
2469        tm.assert_frame_equal(result, expected)
2470
2471    def test_from_records_sequencelike(self):
2472        df = DataFrame(
2473            {
2474                "A": np.array(np.random.randn(6), dtype=np.float64),
2475                "A1": np.array(np.random.randn(6), dtype=np.float64),
2476                "B": np.array(np.arange(6), dtype=np.int64),
2477                "C": ["foo"] * 6,
2478                "D": np.array([True, False] * 3, dtype=bool),
2479                "E": np.array(np.random.randn(6), dtype=np.float32),
2480                "E1": np.array(np.random.randn(6), dtype=np.float32),
2481                "F": np.array(np.arange(6), dtype=np.int32),
2482            }
2483        )
2484
2485        # this is actually tricky to create the recordlike arrays and
2486        # have the dtypes be intact
2487        blocks = df._to_dict_of_blocks()
2488        tuples = []
2489        columns = []
2490        dtypes = []
2491        for dtype, b in blocks.items():
2492            columns.extend(b.columns)
2493            dtypes.extend([(c, np.dtype(dtype).descr[0][1]) for c in b.columns])
2494        for i in range(len(df.index)):
2495            tup = []
2496            for _, b in blocks.items():
2497                tup.extend(b.iloc[i].values)
2498            tuples.append(tuple(tup))
2499
2500        recarray = np.array(tuples, dtype=dtypes).view(np.recarray)
2501        recarray2 = df.to_records()
2502        lists = [list(x) for x in tuples]
2503
2504        # tuples (lose the dtype info)
2505        result = DataFrame.from_records(tuples, columns=columns).reindex(
2506            columns=df.columns
2507        )
2508
2509        # created recarray and with to_records recarray (have dtype info)
2510        result2 = DataFrame.from_records(recarray, columns=columns).reindex(
2511            columns=df.columns
2512        )
2513        result3 = DataFrame.from_records(recarray2, columns=columns).reindex(
2514            columns=df.columns
2515        )
2516
2517        # list of tupels (no dtype info)
2518        result4 = DataFrame.from_records(lists, columns=columns).reindex(
2519            columns=df.columns
2520        )
2521
2522        tm.assert_frame_equal(result, df, check_dtype=False)
2523        tm.assert_frame_equal(result2, df)
2524        tm.assert_frame_equal(result3, df)
2525        tm.assert_frame_equal(result4, df, check_dtype=False)
2526
2527        # tuples is in the order of the columns
2528        result = DataFrame.from_records(tuples)
2529        tm.assert_index_equal(result.columns, RangeIndex(8))
2530
2531        # test exclude parameter & we are casting the results here (as we don't
2532        # have dtype info to recover)
2533        columns_to_test = [columns.index("C"), columns.index("E1")]
2534
2535        exclude = list(set(range(8)) - set(columns_to_test))
2536        result = DataFrame.from_records(tuples, exclude=exclude)
2537        result.columns = [columns[i] for i in sorted(columns_to_test)]
2538        tm.assert_series_equal(result["C"], df["C"])
2539        tm.assert_series_equal(result["E1"], df["E1"].astype("float64"))
2540
2541        # empty case
2542        result = DataFrame.from_records([], columns=["foo", "bar", "baz"])
2543        assert len(result) == 0
2544        tm.assert_index_equal(result.columns, Index(["foo", "bar", "baz"]))
2545
2546        result = DataFrame.from_records([])
2547        assert len(result) == 0
2548        assert len(result.columns) == 0
2549
2550    def test_from_records_dictlike(self):
2551
2552        # test the dict methods
2553        df = DataFrame(
2554            {
2555                "A": np.array(np.random.randn(6), dtype=np.float64),
2556                "A1": np.array(np.random.randn(6), dtype=np.float64),
2557                "B": np.array(np.arange(6), dtype=np.int64),
2558                "C": ["foo"] * 6,
2559                "D": np.array([True, False] * 3, dtype=bool),
2560                "E": np.array(np.random.randn(6), dtype=np.float32),
2561                "E1": np.array(np.random.randn(6), dtype=np.float32),
2562                "F": np.array(np.arange(6), dtype=np.int32),
2563            }
2564        )
2565
2566        # columns is in a different order here than the actual items iterated
2567        # from the dict
2568        blocks = df._to_dict_of_blocks()
2569        columns = []
2570        for dtype, b in blocks.items():
2571            columns.extend(b.columns)
2572
2573        asdict = {x: y for x, y in df.items()}
2574        asdict2 = {x: y.values for x, y in df.items()}
2575
2576        # dict of series & dict of ndarrays (have dtype info)
2577        results = []
2578        results.append(DataFrame.from_records(asdict).reindex(columns=df.columns))
2579        results.append(
2580            DataFrame.from_records(asdict, columns=columns).reindex(columns=df.columns)
2581        )
2582        results.append(
2583            DataFrame.from_records(asdict2, columns=columns).reindex(columns=df.columns)
2584        )
2585
2586        for r in results:
2587            tm.assert_frame_equal(r, df)
2588
2589    def test_from_records_with_index_data(self):
2590        df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"])
2591
2592        data = np.random.randn(10)
2593        df1 = DataFrame.from_records(df, index=data)
2594        tm.assert_index_equal(df1.index, Index(data))
2595
2596    def test_from_records_bad_index_column(self):
2597        df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"])
2598
2599        # should pass
2600        df1 = DataFrame.from_records(df, index=["C"])
2601        tm.assert_index_equal(df1.index, Index(df.C))
2602
2603        df1 = DataFrame.from_records(df, index="C")
2604        tm.assert_index_equal(df1.index, Index(df.C))
2605
2606        # should fail
2607        msg = r"Shape of passed values is \(10, 3\), indices imply \(1, 3\)"
2608        with pytest.raises(ValueError, match=msg):
2609            DataFrame.from_records(df, index=[2])
2610        with pytest.raises(KeyError, match=r"^2$"):
2611            DataFrame.from_records(df, index=2)
2612
2613    def test_from_records_non_tuple(self):
2614        class Record:
2615            def __init__(self, *args):
2616                self.args = args
2617
2618            def __getitem__(self, i):
2619                return self.args[i]
2620
2621            def __iter__(self):
2622                return iter(self.args)
2623
2624        recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)]
2625        tups = [tuple(rec) for rec in recs]
2626
2627        result = DataFrame.from_records(recs)
2628        expected = DataFrame.from_records(tups)
2629        tm.assert_frame_equal(result, expected)
2630
2631    def test_from_records_len0_with_columns(self):
2632        # #2633
2633        result = DataFrame.from_records([], index="foo", columns=["foo", "bar"])
2634        expected = Index(["bar"])
2635
2636        assert len(result) == 0
2637        assert result.index.name == "foo"
2638        tm.assert_index_equal(result.columns, expected)
2639
2640    def test_from_records_series_list_dict(self):
2641        # GH27358
2642        expected = DataFrame([[{"a": 1, "b": 2}, {"a": 3, "b": 4}]]).T
2643        data = Series([[{"a": 1, "b": 2}], [{"a": 3, "b": 4}]])
2644        result = DataFrame.from_records(data)
2645        tm.assert_frame_equal(result, expected)
2646
2647    def test_from_records_series_categorical_index(self):
2648        # GH 32805
2649        index = CategoricalIndex(
2650            [Interval(-20, -10), Interval(-10, 0), Interval(0, 10)]
2651        )
2652        series_of_dicts = Series([{"a": 1}, {"a": 2}, {"b": 3}], index=index)
2653        frame = DataFrame.from_records(series_of_dicts, index=index)
2654        expected = DataFrame(
2655            {"a": [1, 2, np.NaN], "b": [np.NaN, np.NaN, 3]}, index=index
2656        )
2657        tm.assert_frame_equal(frame, expected)
2658
2659    def test_frame_from_records_utc(self):
2660        rec = {"datum": 1.5, "begin_time": datetime(2006, 4, 27, tzinfo=pytz.utc)}
2661
2662        # it works
2663        DataFrame.from_records([rec], index="begin_time")
2664
2665    def test_to_frame_with_falsey_names(self):
2666        # GH 16114
2667        result = Series(name=0, dtype=object).to_frame().dtypes
2668        expected = Series({0: object})
2669        tm.assert_series_equal(result, expected)
2670
2671        result = DataFrame(Series(name=0, dtype=object)).dtypes
2672        tm.assert_series_equal(result, expected)
2673
2674    @pytest.mark.arm_slow
2675    @pytest.mark.parametrize("dtype", [None, "uint8", "category"])
2676    def test_constructor_range_dtype(self, dtype):
2677        expected = DataFrame({"A": [0, 1, 2, 3, 4]}, dtype=dtype or "int64")
2678
2679        # GH 26342
2680        result = DataFrame(range(5), columns=["A"], dtype=dtype)
2681        tm.assert_frame_equal(result, expected)
2682
2683        # GH 16804
2684        result = DataFrame({"A": range(5)}, dtype=dtype)
2685        tm.assert_frame_equal(result, expected)
2686
2687    def test_frame_from_list_subclass(self):
2688        # GH21226
2689        class List(list):
2690            pass
2691
2692        expected = DataFrame([[1, 2, 3], [4, 5, 6]])
2693        result = DataFrame(List([List([1, 2, 3]), List([4, 5, 6])]))
2694        tm.assert_frame_equal(result, expected)
2695
2696    @pytest.mark.parametrize(
2697        "extension_arr",
2698        [
2699            Categorical(list("aabbc")),
2700            SparseArray([1, np.nan, np.nan, np.nan]),
2701            IntervalArray([Interval(0, 1), Interval(1, 5)]),
2702            PeriodArray(pd.period_range(start="1/1/2017", end="1/1/2018", freq="M")),
2703        ],
2704    )
2705    def test_constructor_with_extension_array(self, extension_arr):
2706        # GH11363
2707        expected = DataFrame(Series(extension_arr))
2708        result = DataFrame(extension_arr)
2709        tm.assert_frame_equal(result, expected)
2710
2711    def test_datetime_date_tuple_columns_from_dict(self):
2712        # GH 10863
2713        v = date.today()
2714        tup = v, v
2715        result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup])
2716        expected = DataFrame([0, 1, 2], columns=Index(Series([tup])))
2717        tm.assert_frame_equal(result, expected)
2718
2719    def test_construct_with_two_categoricalindex_series(self):
2720        # GH 14600
2721        s1 = Series([39, 6, 4], index=CategoricalIndex(["female", "male", "unknown"]))
2722        s2 = Series(
2723            [2, 152, 2, 242, 150],
2724            index=CategoricalIndex(["f", "female", "m", "male", "unknown"]),
2725        )
2726        result = DataFrame([s1, s2])
2727        expected = DataFrame(
2728            np.array(
2729                [[np.nan, 39.0, np.nan, 6.0, 4.0], [2.0, 152.0, 2.0, 242.0, 150.0]]
2730            ),
2731            columns=["f", "female", "m", "male", "unknown"],
2732        )
2733        tm.assert_frame_equal(result, expected)
2734
2735    def test_from_M8_structured(self):
2736        dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))]
2737        arr = np.array(dates, dtype=[("Date", "M8[us]"), ("Forecasting", "M8[us]")])
2738        df = DataFrame(arr)
2739
2740        assert df["Date"][0] == dates[0][0]
2741        assert df["Forecasting"][0] == dates[0][1]
2742
2743        s = Series(arr["Date"])
2744        assert isinstance(s[0], Timestamp)
2745        assert s[0] == dates[0][0]
2746
2747    def test_from_datetime_subclass(self):
2748        # GH21142 Verify whether Datetime subclasses are also of dtype datetime
2749        class DatetimeSubclass(datetime):
2750            pass
2751
2752        data = DataFrame({"datetime": [DatetimeSubclass(2020, 1, 1, 1, 1)]})
2753        assert data.datetime.dtype == "datetime64[ns]"
2754
2755    def test_with_mismatched_index_length_raises(self):
2756        # GH#33437
2757        dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
2758        with pytest.raises(ValueError, match="Shape of passed values"):
2759            DataFrame(dti, index=range(4))
2760
2761    def test_frame_ctor_datetime64_column(self):
2762        rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
2763        dates = np.asarray(rng)
2764
2765        df = DataFrame({"A": np.random.randn(len(rng)), "B": dates})
2766        assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]"))
2767
2768    def test_dataframe_constructor_infer_multiindex(self):
2769        index_lists = [["a", "a", "b", "b"], ["x", "y", "x", "y"]]
2770
2771        multi = DataFrame(
2772            np.random.randn(4, 4),
2773            index=[np.array(x) for x in index_lists],
2774        )
2775        assert isinstance(multi.index, MultiIndex)
2776        assert not isinstance(multi.columns, MultiIndex)
2777
2778        multi = DataFrame(np.random.randn(4, 4), columns=index_lists)
2779        assert isinstance(multi.columns, MultiIndex)
2780
2781    @pytest.mark.parametrize(
2782        "input_vals",
2783        [
2784            ([1, 2]),
2785            (["1", "2"]),
2786            (list(date_range("1/1/2011", periods=2, freq="H"))),
2787            (list(date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))),
2788            ([Interval(left=0, right=5)]),
2789        ],
2790    )
2791    def test_constructor_list_str(self, input_vals, string_dtype):
2792        # GH#16605
2793        # Ensure that data elements are converted to strings when
2794        # dtype is str, 'str', or 'U'
2795
2796        result = DataFrame({"A": input_vals}, dtype=string_dtype)
2797        expected = DataFrame({"A": input_vals}).astype({"A": string_dtype})
2798        tm.assert_frame_equal(result, expected)
2799
2800    def test_constructor_list_str_na(self, string_dtype):
2801
2802        result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype)
2803        expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object)
2804        tm.assert_frame_equal(result, expected)
2805
2806
2807class TestDataFrameConstructorWithDatetimeTZ:
2808    @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
2809    def test_construction_preserves_tzaware_dtypes(self, tz):
2810        # after GH#7822
2811        # these retain the timezones on dict construction
2812        dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
2813        dr_tz = dr.tz_localize(tz)
2814        df = DataFrame({"A": "foo", "B": dr_tz}, index=dr)
2815        tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo)
2816        assert df["B"].dtype == tz_expected
2817
2818        # GH#2810 (with timezones)
2819        datetimes_naive = [ts.to_pydatetime() for ts in dr]
2820        datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz]
2821        df = DataFrame({"dr": dr})
2822        df["dr_tz"] = dr_tz
2823        df["datetimes_naive"] = datetimes_naive
2824        df["datetimes_with_tz"] = datetimes_with_tz
2825        result = df.dtypes
2826        expected = Series(
2827            [
2828                np.dtype("datetime64[ns]"),
2829                DatetimeTZDtype(tz=tz),
2830                np.dtype("datetime64[ns]"),
2831                DatetimeTZDtype(tz=tz),
2832            ],
2833            index=["dr", "dr_tz", "datetimes_naive", "datetimes_with_tz"],
2834        )
2835        tm.assert_series_equal(result, expected)
2836
2837    def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
2838        # GH#25843
2839        tz = tz_aware_fixture
2840        result = DataFrame({"d": [Timestamp("2019", tz=tz)]}, dtype="datetime64[ns]")
2841        expected = DataFrame({"d": [Timestamp("2019")]})
2842        tm.assert_frame_equal(result, expected)
2843
2844    def test_from_dict(self):
2845
2846        # 8260
2847        # support datetime64 with tz
2848
2849        idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
2850        dr = date_range("20130110", periods=3)
2851
2852        # construction
2853        df = DataFrame({"A": idx, "B": dr})
2854        assert df["A"].dtype, "M8[ns, US/Eastern"
2855        assert df["A"].name == "A"
2856        tm.assert_series_equal(df["A"], Series(idx, name="A"))
2857        tm.assert_series_equal(df["B"], Series(dr, name="B"))
2858
2859    def test_from_index(self):
2860
2861        # from index
2862        idx2 = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
2863        df2 = DataFrame(idx2)
2864        tm.assert_series_equal(df2["foo"], Series(idx2, name="foo"))
2865        df2 = DataFrame(Series(idx2))
2866        tm.assert_series_equal(df2["foo"], Series(idx2, name="foo"))
2867
2868        idx2 = date_range("20130101", periods=3, tz="US/Eastern")
2869        df2 = DataFrame(idx2)
2870        tm.assert_series_equal(df2[0], Series(idx2, name=0))
2871        df2 = DataFrame(Series(idx2))
2872        tm.assert_series_equal(df2[0], Series(idx2, name=0))
2873
2874    def test_frame_dict_constructor_datetime64_1680(self):
2875        dr = date_range("1/1/2012", periods=10)
2876        s = Series(dr, index=dr)
2877
2878        # it works!
2879        DataFrame({"a": "foo", "b": s}, index=dr)
2880        DataFrame({"a": "foo", "b": s.values}, index=dr)
2881
2882    def test_frame_datetime64_mixed_index_ctor_1681(self):
2883        dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
2884        ts = Series(dr)
2885
2886        # it works!
2887        d = DataFrame({"A": "foo", "B": ts}, index=dr)
2888        assert d["B"].isna().all()
2889
2890    def test_frame_timeseries_to_records(self):
2891        index = date_range("1/1/2000", periods=10)
2892        df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"])
2893
2894        result = df.to_records()
2895        result["index"].dtype == "M8[ns]"
2896
2897        result = df.to_records(index=False)
2898
2899    def test_frame_timeseries_column(self):
2900        # GH19157
2901        dr = date_range(start="20130101T10:00:00", periods=3, freq="T", tz="US/Eastern")
2902        result = DataFrame(dr, columns=["timestamps"])
2903        expected = DataFrame(
2904            {
2905                "timestamps": [
2906                    Timestamp("20130101T10:00:00", tz="US/Eastern"),
2907                    Timestamp("20130101T10:01:00", tz="US/Eastern"),
2908                    Timestamp("20130101T10:02:00", tz="US/Eastern"),
2909                ]
2910            }
2911        )
2912        tm.assert_frame_equal(result, expected)
2913
2914    def test_nested_dict_construction(self):
2915        # GH22227
2916        columns = ["Nevada", "Ohio"]
2917        pop = {
2918            "Nevada": {2001: 2.4, 2002: 2.9},
2919            "Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6},
2920        }
2921        result = DataFrame(pop, index=[2001, 2002, 2003], columns=columns)
2922        expected = DataFrame(
2923            [(2.4, 1.7), (2.9, 3.6), (np.nan, np.nan)],
2924            columns=columns,
2925            index=Index([2001, 2002, 2003]),
2926        )
2927        tm.assert_frame_equal(result, expected)
2928
2929    def test_from_tzaware_object_array(self):
2930        # GH#26825 2D object array of tzaware timestamps should not raise
2931        dti = date_range("2016-04-05 04:30", periods=3, tz="UTC")
2932        data = dti._data.astype(object).reshape(1, -1)
2933        df = DataFrame(data)
2934        assert df.shape == (1, 3)
2935        assert (df.dtypes == dti.dtype).all()
2936        assert (df == dti).all().all()
2937
2938    def test_from_tzaware_mixed_object_array(self):
2939        # GH#26825
2940        arr = np.array(
2941            [
2942                [
2943                    Timestamp("2013-01-01 00:00:00"),
2944                    Timestamp("2013-01-02 00:00:00"),
2945                    Timestamp("2013-01-03 00:00:00"),
2946                ],
2947                [
2948                    Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"),
2949                    pd.NaT,
2950                    Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"),
2951                ],
2952                [
2953                    Timestamp("2013-01-01 00:00:00+0100", tz="CET"),
2954                    pd.NaT,
2955                    Timestamp("2013-01-03 00:00:00+0100", tz="CET"),
2956                ],
2957            ],
2958            dtype=object,
2959        ).T
2960        res = DataFrame(arr, columns=["A", "B", "C"])
2961
2962        expected_dtypes = [
2963            "datetime64[ns]",
2964            "datetime64[ns, US/Eastern]",
2965            "datetime64[ns, CET]",
2966        ]
2967        assert (res.dtypes == expected_dtypes).all()
2968
2969    def test_from_2d_ndarray_with_dtype(self):
2970        # GH#12513
2971        array_dim2 = np.arange(10).reshape((5, 2))
2972        df = DataFrame(array_dim2, dtype="datetime64[ns, UTC]")
2973
2974        expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]")
2975        tm.assert_frame_equal(df, expected)
2976
2977    def test_construction_from_set_raises(self):
2978        # https://github.com/pandas-dev/pandas/issues/32582
2979        msg = "Set type is unordered"
2980        with pytest.raises(TypeError, match=msg):
2981            DataFrame({"a": {1, 2, 3}})
2982