1import numpy as np
2import pytest
3
4import pandas as pd
5from pandas import Categorical, DataFrame, Index, Series
6import pandas._testing as tm
7
8
9class TestConcatAppendCommon:
10    """
11    Test common dtype coercion rules between concat and append.
12    """
13
14    def setup_method(self, method):
15
16        dt_data = [
17            pd.Timestamp("2011-01-01"),
18            pd.Timestamp("2011-01-02"),
19            pd.Timestamp("2011-01-03"),
20        ]
21        tz_data = [
22            pd.Timestamp("2011-01-01", tz="US/Eastern"),
23            pd.Timestamp("2011-01-02", tz="US/Eastern"),
24            pd.Timestamp("2011-01-03", tz="US/Eastern"),
25        ]
26
27        td_data = [
28            pd.Timedelta("1 days"),
29            pd.Timedelta("2 days"),
30            pd.Timedelta("3 days"),
31        ]
32
33        period_data = [
34            pd.Period("2011-01", freq="M"),
35            pd.Period("2011-02", freq="M"),
36            pd.Period("2011-03", freq="M"),
37        ]
38
39        self.data = {
40            "bool": [True, False, True],
41            "int64": [1, 2, 3],
42            "float64": [1.1, np.nan, 3.3],
43            "category": Categorical(["X", "Y", "Z"]),
44            "object": ["a", "b", "c"],
45            "datetime64[ns]": dt_data,
46            "datetime64[ns, US/Eastern]": tz_data,
47            "timedelta64[ns]": td_data,
48            "period[M]": period_data,
49        }
50
51    def _check_expected_dtype(self, obj, label):
52        """
53        Check whether obj has expected dtype depending on label
54        considering not-supported dtypes
55        """
56        if isinstance(obj, Index):
57            if label == "bool":
58                assert obj.dtype == "object"
59            else:
60                assert obj.dtype == label
61        elif isinstance(obj, Series):
62            if label.startswith("period"):
63                assert obj.dtype == "Period[M]"
64            else:
65                assert obj.dtype == label
66        else:
67            raise ValueError
68
69    def test_dtypes(self):
70        # to confirm test case covers intended dtypes
71        for typ, vals in self.data.items():
72            self._check_expected_dtype(Index(vals), typ)
73            self._check_expected_dtype(Series(vals), typ)
74
75    def test_concatlike_same_dtypes(self):
76        # GH 13660
77        for typ1, vals1 in self.data.items():
78
79            vals2 = vals1
80            vals3 = vals1
81
82            if typ1 == "category":
83                exp_data = Categorical(list(vals1) + list(vals2))
84                exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3))
85            else:
86                exp_data = vals1 + vals2
87                exp_data3 = vals1 + vals2 + vals3
88
89            # ----- Index ----- #
90
91            # index.append
92            res = Index(vals1).append(Index(vals2))
93            exp = Index(exp_data)
94            tm.assert_index_equal(res, exp)
95
96            # 3 elements
97            res = Index(vals1).append([Index(vals2), Index(vals3)])
98            exp = Index(exp_data3)
99            tm.assert_index_equal(res, exp)
100
101            # index.append name mismatch
102            i1 = Index(vals1, name="x")
103            i2 = Index(vals2, name="y")
104            res = i1.append(i2)
105            exp = Index(exp_data)
106            tm.assert_index_equal(res, exp)
107
108            # index.append name match
109            i1 = Index(vals1, name="x")
110            i2 = Index(vals2, name="x")
111            res = i1.append(i2)
112            exp = Index(exp_data, name="x")
113            tm.assert_index_equal(res, exp)
114
115            # cannot append non-index
116            with pytest.raises(TypeError, match="all inputs must be Index"):
117                Index(vals1).append(vals2)
118
119            with pytest.raises(TypeError, match="all inputs must be Index"):
120                Index(vals1).append([Index(vals2), vals3])
121
122            # ----- Series ----- #
123
124            # series.append
125            res = Series(vals1).append(Series(vals2), ignore_index=True)
126            exp = Series(exp_data)
127            tm.assert_series_equal(res, exp, check_index_type=True)
128
129            # concat
130            res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
131            tm.assert_series_equal(res, exp, check_index_type=True)
132
133            # 3 elements
134            res = Series(vals1).append(
135                [Series(vals2), Series(vals3)], ignore_index=True
136            )
137            exp = Series(exp_data3)
138            tm.assert_series_equal(res, exp)
139
140            res = pd.concat(
141                [Series(vals1), Series(vals2), Series(vals3)],
142                ignore_index=True,
143            )
144            tm.assert_series_equal(res, exp)
145
146            # name mismatch
147            s1 = Series(vals1, name="x")
148            s2 = Series(vals2, name="y")
149            res = s1.append(s2, ignore_index=True)
150            exp = Series(exp_data)
151            tm.assert_series_equal(res, exp, check_index_type=True)
152
153            res = pd.concat([s1, s2], ignore_index=True)
154            tm.assert_series_equal(res, exp, check_index_type=True)
155
156            # name match
157            s1 = Series(vals1, name="x")
158            s2 = Series(vals2, name="x")
159            res = s1.append(s2, ignore_index=True)
160            exp = Series(exp_data, name="x")
161            tm.assert_series_equal(res, exp, check_index_type=True)
162
163            res = pd.concat([s1, s2], ignore_index=True)
164            tm.assert_series_equal(res, exp, check_index_type=True)
165
166            # cannot append non-index
167            msg = (
168                r"cannot concatenate object of type '.+'; "
169                "only Series and DataFrame objs are valid"
170            )
171            with pytest.raises(TypeError, match=msg):
172                Series(vals1).append(vals2)
173
174            with pytest.raises(TypeError, match=msg):
175                Series(vals1).append([Series(vals2), vals3])
176
177            with pytest.raises(TypeError, match=msg):
178                pd.concat([Series(vals1), vals2])
179
180            with pytest.raises(TypeError, match=msg):
181                pd.concat([Series(vals1), Series(vals2), vals3])
182
183    def test_concatlike_dtypes_coercion(self):
184        # GH 13660
185        for typ1, vals1 in self.data.items():
186            for typ2, vals2 in self.data.items():
187
188                vals3 = vals2
189
190                # basically infer
191                exp_index_dtype = None
192                exp_series_dtype = None
193
194                if typ1 == typ2:
195                    # same dtype is tested in test_concatlike_same_dtypes
196                    continue
197                elif typ1 == "category" or typ2 == "category":
198                    # TODO: suspicious
199                    continue
200
201                # specify expected dtype
202                if typ1 == "bool" and typ2 in ("int64", "float64"):
203                    # series coerces to numeric based on numpy rule
204                    # index doesn't because bool is object dtype
205                    exp_series_dtype = typ2
206                elif typ2 == "bool" and typ1 in ("int64", "float64"):
207                    exp_series_dtype = typ1
208                elif (
209                    typ1 == "datetime64[ns, US/Eastern]"
210                    or typ2 == "datetime64[ns, US/Eastern]"
211                    or typ1 == "timedelta64[ns]"
212                    or typ2 == "timedelta64[ns]"
213                ):
214                    exp_index_dtype = object
215                    exp_series_dtype = object
216
217                exp_data = vals1 + vals2
218                exp_data3 = vals1 + vals2 + vals3
219
220                # ----- Index ----- #
221
222                # index.append
223                res = Index(vals1).append(Index(vals2))
224                exp = Index(exp_data, dtype=exp_index_dtype)
225                tm.assert_index_equal(res, exp)
226
227                # 3 elements
228                res = Index(vals1).append([Index(vals2), Index(vals3)])
229                exp = Index(exp_data3, dtype=exp_index_dtype)
230                tm.assert_index_equal(res, exp)
231
232                # ----- Series ----- #
233
234                # series.append
235                res = Series(vals1).append(Series(vals2), ignore_index=True)
236                exp = Series(exp_data, dtype=exp_series_dtype)
237                tm.assert_series_equal(res, exp, check_index_type=True)
238
239                # concat
240                res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
241                tm.assert_series_equal(res, exp, check_index_type=True)
242
243                # 3 elements
244                res = Series(vals1).append(
245                    [Series(vals2), Series(vals3)], ignore_index=True
246                )
247                exp = Series(exp_data3, dtype=exp_series_dtype)
248                tm.assert_series_equal(res, exp)
249
250                res = pd.concat(
251                    [Series(vals1), Series(vals2), Series(vals3)],
252                    ignore_index=True,
253                )
254                tm.assert_series_equal(res, exp)
255
256    def test_concatlike_common_coerce_to_pandas_object(self):
257        # GH 13626
258        # result must be Timestamp/Timedelta, not datetime.datetime/timedelta
259        dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"])
260        tdi = pd.TimedeltaIndex(["1 days", "2 days"])
261
262        exp = Index(
263            [
264                pd.Timestamp("2011-01-01"),
265                pd.Timestamp("2011-01-02"),
266                pd.Timedelta("1 days"),
267                pd.Timedelta("2 days"),
268            ]
269        )
270
271        res = dti.append(tdi)
272        tm.assert_index_equal(res, exp)
273        assert isinstance(res[0], pd.Timestamp)
274        assert isinstance(res[-1], pd.Timedelta)
275
276        dts = Series(dti)
277        tds = Series(tdi)
278        res = dts.append(tds)
279        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
280        assert isinstance(res.iloc[0], pd.Timestamp)
281        assert isinstance(res.iloc[-1], pd.Timedelta)
282
283        res = pd.concat([dts, tds])
284        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
285        assert isinstance(res.iloc[0], pd.Timestamp)
286        assert isinstance(res.iloc[-1], pd.Timedelta)
287
288    def test_concatlike_datetimetz(self, tz_aware_fixture):
289        tz = tz_aware_fixture
290        # GH 7795
291        dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
292        dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz)
293
294        exp = pd.DatetimeIndex(
295            ["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz
296        )
297
298        res = dti1.append(dti2)
299        tm.assert_index_equal(res, exp)
300
301        dts1 = Series(dti1)
302        dts2 = Series(dti2)
303        res = dts1.append(dts2)
304        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
305
306        res = pd.concat([dts1, dts2])
307        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
308
309    @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"])
310    def test_concatlike_datetimetz_short(self, tz):
311        # GH#7795
312        ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz)
313        ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz)
314        df1 = DataFrame(0, index=ix1, columns=["A", "B"])
315        df2 = DataFrame(0, index=ix2, columns=["A", "B"])
316
317        exp_idx = pd.DatetimeIndex(
318            ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"],
319            tz=tz,
320        )
321        exp = DataFrame(0, index=exp_idx, columns=["A", "B"])
322
323        tm.assert_frame_equal(df1.append(df2), exp)
324        tm.assert_frame_equal(pd.concat([df1, df2]), exp)
325
326    def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
327        tz = tz_aware_fixture
328        # GH 13660
329
330        # different tz coerces to object
331        dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
332        dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"])
333
334        exp = Index(
335            [
336                pd.Timestamp("2011-01-01", tz=tz),
337                pd.Timestamp("2011-01-02", tz=tz),
338                pd.Timestamp("2012-01-01"),
339                pd.Timestamp("2012-01-02"),
340            ],
341            dtype=object,
342        )
343
344        res = dti1.append(dti2)
345        tm.assert_index_equal(res, exp)
346
347        dts1 = Series(dti1)
348        dts2 = Series(dti2)
349        res = dts1.append(dts2)
350        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
351
352        res = pd.concat([dts1, dts2])
353        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
354
355        # different tz
356        dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific")
357
358        exp = Index(
359            [
360                pd.Timestamp("2011-01-01", tz=tz),
361                pd.Timestamp("2011-01-02", tz=tz),
362                pd.Timestamp("2012-01-01", tz="US/Pacific"),
363                pd.Timestamp("2012-01-02", tz="US/Pacific"),
364            ],
365            dtype=object,
366        )
367
368        res = dti1.append(dti3)
369        # tm.assert_index_equal(res, exp)
370
371        dts1 = Series(dti1)
372        dts3 = Series(dti3)
373        res = dts1.append(dts3)
374        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
375
376        res = pd.concat([dts1, dts3])
377        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
378
379    def test_concatlike_common_period(self):
380        # GH 13660
381        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
382        pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M")
383
384        exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M")
385
386        res = pi1.append(pi2)
387        tm.assert_index_equal(res, exp)
388
389        ps1 = Series(pi1)
390        ps2 = Series(pi2)
391        res = ps1.append(ps2)
392        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
393
394        res = pd.concat([ps1, ps2])
395        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
396
397    def test_concatlike_common_period_diff_freq_to_object(self):
398        # GH 13221
399        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
400        pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D")
401
402        exp = Index(
403            [
404                pd.Period("2011-01", freq="M"),
405                pd.Period("2011-02", freq="M"),
406                pd.Period("2012-01-01", freq="D"),
407                pd.Period("2012-02-01", freq="D"),
408            ],
409            dtype=object,
410        )
411
412        res = pi1.append(pi2)
413        tm.assert_index_equal(res, exp)
414
415        ps1 = Series(pi1)
416        ps2 = Series(pi2)
417        res = ps1.append(ps2)
418        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
419
420        res = pd.concat([ps1, ps2])
421        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
422
423    def test_concatlike_common_period_mixed_dt_to_object(self):
424        # GH 13221
425        # different datetimelike
426        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
427        tdi = pd.TimedeltaIndex(["1 days", "2 days"])
428        exp = Index(
429            [
430                pd.Period("2011-01", freq="M"),
431                pd.Period("2011-02", freq="M"),
432                pd.Timedelta("1 days"),
433                pd.Timedelta("2 days"),
434            ],
435            dtype=object,
436        )
437
438        res = pi1.append(tdi)
439        tm.assert_index_equal(res, exp)
440
441        ps1 = Series(pi1)
442        tds = Series(tdi)
443        res = ps1.append(tds)
444        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
445
446        res = pd.concat([ps1, tds])
447        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
448
449        # inverse
450        exp = Index(
451            [
452                pd.Timedelta("1 days"),
453                pd.Timedelta("2 days"),
454                pd.Period("2011-01", freq="M"),
455                pd.Period("2011-02", freq="M"),
456            ],
457            dtype=object,
458        )
459
460        res = tdi.append(pi1)
461        tm.assert_index_equal(res, exp)
462
463        ps1 = Series(pi1)
464        tds = Series(tdi)
465        res = tds.append(ps1)
466        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
467
468        res = pd.concat([tds, ps1])
469        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
470
471    def test_concat_categorical(self):
472        # GH 13524
473
474        # same categories -> category
475        s1 = Series([1, 2, np.nan], dtype="category")
476        s2 = Series([2, 1, 2], dtype="category")
477
478        exp = Series([1, 2, np.nan, 2, 1, 2], dtype="category")
479        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
480        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
481
482        # partially different categories => not-category
483        s1 = Series([3, 2], dtype="category")
484        s2 = Series([2, 1], dtype="category")
485
486        exp = Series([3, 2, 2, 1])
487        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
488        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
489
490        # completely different categories (same dtype) => not-category
491        s1 = Series([10, 11, np.nan], dtype="category")
492        s2 = Series([np.nan, 1, 3, 2], dtype="category")
493
494        exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype="object")
495        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
496        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
497
498    def test_union_categorical_same_categories_different_order(self):
499        # https://github.com/pandas-dev/pandas/issues/19096
500        a = Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"]))
501        b = Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"]))
502        result = pd.concat([a, b], ignore_index=True)
503        expected = Series(
504            Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"])
505        )
506        tm.assert_series_equal(result, expected)
507
508    def test_concat_categorical_coercion(self):
509        # GH 13524
510
511        # category + not-category => not-category
512        s1 = Series([1, 2, np.nan], dtype="category")
513        s2 = Series([2, 1, 2])
514
515        exp = Series([1, 2, np.nan, 2, 1, 2], dtype="object")
516        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
517        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
518
519        # result shouldn't be affected by 1st elem dtype
520        exp = Series([2, 1, 2, 1, 2, np.nan], dtype="object")
521        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
522        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
523
524        # all values are not in category => not-category
525        s1 = Series([3, 2], dtype="category")
526        s2 = Series([2, 1])
527
528        exp = Series([3, 2, 2, 1])
529        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
530        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
531
532        exp = Series([2, 1, 3, 2])
533        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
534        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
535
536        # completely different categories => not-category
537        s1 = Series([10, 11, np.nan], dtype="category")
538        s2 = Series([1, 3, 2])
539
540        exp = Series([10, 11, np.nan, 1, 3, 2], dtype="object")
541        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
542        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
543
544        exp = Series([1, 3, 2, 10, 11, np.nan], dtype="object")
545        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
546        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
547
548        # different dtype => not-category
549        s1 = Series([10, 11, np.nan], dtype="category")
550        s2 = Series(["a", "b", "c"])
551
552        exp = Series([10, 11, np.nan, "a", "b", "c"])
553        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
554        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
555
556        exp = Series(["a", "b", "c", 10, 11, np.nan])
557        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
558        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
559
560        # if normal series only contains NaN-likes => not-category
561        s1 = Series([10, 11], dtype="category")
562        s2 = Series([np.nan, np.nan, np.nan])
563
564        exp = Series([10, 11, np.nan, np.nan, np.nan])
565        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
566        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
567
568        exp = Series([np.nan, np.nan, np.nan, 10, 11])
569        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
570        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
571
572    def test_concat_categorical_3elem_coercion(self):
573        # GH 13524
574
575        # mixed dtypes => not-category
576        s1 = Series([1, 2, np.nan], dtype="category")
577        s2 = Series([2, 1, 2], dtype="category")
578        s3 = Series([1, 2, 1, 2, np.nan])
579
580        exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float")
581        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
582        tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
583
584        exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float")
585        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
586        tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
587
588        # values are all in either category => not-category
589        s1 = Series([4, 5, 6], dtype="category")
590        s2 = Series([1, 2, 3], dtype="category")
591        s3 = Series([1, 3, 4])
592
593        exp = Series([4, 5, 6, 1, 2, 3, 1, 3, 4])
594        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
595        tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
596
597        exp = Series([1, 3, 4, 4, 5, 6, 1, 2, 3])
598        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
599        tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
600
601        # values are all in either category => not-category
602        s1 = Series([4, 5, 6], dtype="category")
603        s2 = Series([1, 2, 3], dtype="category")
604        s3 = Series([10, 11, 12])
605
606        exp = Series([4, 5, 6, 1, 2, 3, 10, 11, 12])
607        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
608        tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
609
610        exp = Series([10, 11, 12, 4, 5, 6, 1, 2, 3])
611        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
612        tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
613
614    def test_concat_categorical_multi_coercion(self):
615        # GH 13524
616
617        s1 = Series([1, 3], dtype="category")
618        s2 = Series([3, 4], dtype="category")
619        s3 = Series([2, 3])
620        s4 = Series([2, 2], dtype="category")
621        s5 = Series([1, np.nan])
622        s6 = Series([1, 3, 2], dtype="category")
623
624        # mixed dtype, values are all in categories => not-category
625        exp = Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2])
626        res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True)
627        tm.assert_series_equal(res, exp)
628        res = s1.append([s2, s3, s4, s5, s6], ignore_index=True)
629        tm.assert_series_equal(res, exp)
630
631        exp = Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3])
632        res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True)
633        tm.assert_series_equal(res, exp)
634        res = s6.append([s5, s4, s3, s2, s1], ignore_index=True)
635        tm.assert_series_equal(res, exp)
636
637    def test_concat_categorical_ordered(self):
638        # GH 13524
639
640        s1 = Series(Categorical([1, 2, np.nan], ordered=True))
641        s2 = Series(Categorical([2, 1, 2], ordered=True))
642
643        exp = Series(Categorical([1, 2, np.nan, 2, 1, 2], ordered=True))
644        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
645        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
646
647        exp = Series(Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True))
648        tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp)
649        tm.assert_series_equal(s1.append([s2, s1], ignore_index=True), exp)
650
651    def test_concat_categorical_coercion_nan(self):
652        # GH 13524
653
654        # some edge cases
655        # category + not-category => not category
656        s1 = Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category")
657        s2 = Series([np.nan, 1])
658
659        exp = Series([np.nan, np.nan, np.nan, 1])
660        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
661        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
662
663        s1 = Series([1, np.nan], dtype="category")
664        s2 = Series([np.nan, np.nan])
665
666        exp = Series([1, np.nan, np.nan, np.nan], dtype="float")
667        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
668        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
669
670        # mixed dtype, all nan-likes => not-category
671        s1 = Series([np.nan, np.nan], dtype="category")
672        s2 = Series([np.nan, np.nan])
673
674        exp = Series([np.nan, np.nan, np.nan, np.nan])
675        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
676        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
677        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
678        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
679
680        # all category nan-likes => category
681        s1 = Series([np.nan, np.nan], dtype="category")
682        s2 = Series([np.nan, np.nan], dtype="category")
683
684        exp = Series([np.nan, np.nan, np.nan, np.nan], dtype="category")
685
686        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
687        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
688
689    def test_concat_categorical_empty(self):
690        # GH 13524
691
692        s1 = Series([], dtype="category")
693        s2 = Series([1, 2], dtype="category")
694
695        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
696        tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
697
698        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
699        tm.assert_series_equal(s2.append(s1, ignore_index=True), s2)
700
701        s1 = Series([], dtype="category")
702        s2 = Series([], dtype="category")
703
704        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
705        tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
706
707        s1 = Series([], dtype="category")
708        s2 = Series([], dtype="object")
709
710        # different dtype => not-category
711        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
712        tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
713        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
714        tm.assert_series_equal(s2.append(s1, ignore_index=True), s2)
715
716        s1 = Series([], dtype="category")
717        s2 = Series([np.nan, np.nan])
718
719        # empty Series is ignored
720        exp = Series([np.nan, np.nan])
721        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
722        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
723
724        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
725        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
726
727    def test_categorical_concat_append(self):
728        cat = Categorical(["a", "b"], categories=["a", "b"])
729        vals = [1, 2]
730        df = DataFrame({"cats": cat, "vals": vals})
731        cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
732        vals2 = [1, 2, 1, 2]
733        exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1]))
734
735        tm.assert_frame_equal(pd.concat([df, df]), exp)
736        tm.assert_frame_equal(df.append(df), exp)
737
738        # GH 13524 can concat different categories
739        cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
740        vals3 = [1, 2]
741        df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
742
743        res = pd.concat([df, df_different_categories], ignore_index=True)
744        exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
745        tm.assert_frame_equal(res, exp)
746
747        res = df.append(df_different_categories, ignore_index=True)
748        tm.assert_frame_equal(res, exp)
749