1import numpy as np 2import pytest 3 4import pandas as pd 5from pandas import Categorical, DataFrame, Index, Series 6import pandas._testing as tm 7 8 9class TestConcatAppendCommon: 10 """ 11 Test common dtype coercion rules between concat and append. 12 """ 13 14 def setup_method(self, method): 15 16 dt_data = [ 17 pd.Timestamp("2011-01-01"), 18 pd.Timestamp("2011-01-02"), 19 pd.Timestamp("2011-01-03"), 20 ] 21 tz_data = [ 22 pd.Timestamp("2011-01-01", tz="US/Eastern"), 23 pd.Timestamp("2011-01-02", tz="US/Eastern"), 24 pd.Timestamp("2011-01-03", tz="US/Eastern"), 25 ] 26 27 td_data = [ 28 pd.Timedelta("1 days"), 29 pd.Timedelta("2 days"), 30 pd.Timedelta("3 days"), 31 ] 32 33 period_data = [ 34 pd.Period("2011-01", freq="M"), 35 pd.Period("2011-02", freq="M"), 36 pd.Period("2011-03", freq="M"), 37 ] 38 39 self.data = { 40 "bool": [True, False, True], 41 "int64": [1, 2, 3], 42 "float64": [1.1, np.nan, 3.3], 43 "category": Categorical(["X", "Y", "Z"]), 44 "object": ["a", "b", "c"], 45 "datetime64[ns]": dt_data, 46 "datetime64[ns, US/Eastern]": tz_data, 47 "timedelta64[ns]": td_data, 48 "period[M]": period_data, 49 } 50 51 def _check_expected_dtype(self, obj, label): 52 """ 53 Check whether obj has expected dtype depending on label 54 considering not-supported dtypes 55 """ 56 if isinstance(obj, Index): 57 if label == "bool": 58 assert obj.dtype == "object" 59 else: 60 assert obj.dtype == label 61 elif isinstance(obj, Series): 62 if label.startswith("period"): 63 assert obj.dtype == "Period[M]" 64 else: 65 assert obj.dtype == label 66 else: 67 raise ValueError 68 69 def test_dtypes(self): 70 # to confirm test case covers intended dtypes 71 for typ, vals in self.data.items(): 72 self._check_expected_dtype(Index(vals), typ) 73 self._check_expected_dtype(Series(vals), typ) 74 75 def test_concatlike_same_dtypes(self): 76 # GH 13660 77 for typ1, vals1 in self.data.items(): 78 79 vals2 = vals1 80 vals3 = vals1 81 82 if typ1 == "category": 83 exp_data = Categorical(list(vals1) + list(vals2)) 84 exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3)) 85 else: 86 exp_data = vals1 + vals2 87 exp_data3 = vals1 + vals2 + vals3 88 89 # ----- Index ----- # 90 91 # index.append 92 res = Index(vals1).append(Index(vals2)) 93 exp = Index(exp_data) 94 tm.assert_index_equal(res, exp) 95 96 # 3 elements 97 res = Index(vals1).append([Index(vals2), Index(vals3)]) 98 exp = Index(exp_data3) 99 tm.assert_index_equal(res, exp) 100 101 # index.append name mismatch 102 i1 = Index(vals1, name="x") 103 i2 = Index(vals2, name="y") 104 res = i1.append(i2) 105 exp = Index(exp_data) 106 tm.assert_index_equal(res, exp) 107 108 # index.append name match 109 i1 = Index(vals1, name="x") 110 i2 = Index(vals2, name="x") 111 res = i1.append(i2) 112 exp = Index(exp_data, name="x") 113 tm.assert_index_equal(res, exp) 114 115 # cannot append non-index 116 with pytest.raises(TypeError, match="all inputs must be Index"): 117 Index(vals1).append(vals2) 118 119 with pytest.raises(TypeError, match="all inputs must be Index"): 120 Index(vals1).append([Index(vals2), vals3]) 121 122 # ----- Series ----- # 123 124 # series.append 125 res = Series(vals1).append(Series(vals2), ignore_index=True) 126 exp = Series(exp_data) 127 tm.assert_series_equal(res, exp, check_index_type=True) 128 129 # concat 130 res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True) 131 tm.assert_series_equal(res, exp, check_index_type=True) 132 133 # 3 elements 134 res = Series(vals1).append( 135 [Series(vals2), Series(vals3)], ignore_index=True 136 ) 137 exp = Series(exp_data3) 138 tm.assert_series_equal(res, exp) 139 140 res = pd.concat( 141 [Series(vals1), Series(vals2), Series(vals3)], 142 ignore_index=True, 143 ) 144 tm.assert_series_equal(res, exp) 145 146 # name mismatch 147 s1 = Series(vals1, name="x") 148 s2 = Series(vals2, name="y") 149 res = s1.append(s2, ignore_index=True) 150 exp = Series(exp_data) 151 tm.assert_series_equal(res, exp, check_index_type=True) 152 153 res = pd.concat([s1, s2], ignore_index=True) 154 tm.assert_series_equal(res, exp, check_index_type=True) 155 156 # name match 157 s1 = Series(vals1, name="x") 158 s2 = Series(vals2, name="x") 159 res = s1.append(s2, ignore_index=True) 160 exp = Series(exp_data, name="x") 161 tm.assert_series_equal(res, exp, check_index_type=True) 162 163 res = pd.concat([s1, s2], ignore_index=True) 164 tm.assert_series_equal(res, exp, check_index_type=True) 165 166 # cannot append non-index 167 msg = ( 168 r"cannot concatenate object of type '.+'; " 169 "only Series and DataFrame objs are valid" 170 ) 171 with pytest.raises(TypeError, match=msg): 172 Series(vals1).append(vals2) 173 174 with pytest.raises(TypeError, match=msg): 175 Series(vals1).append([Series(vals2), vals3]) 176 177 with pytest.raises(TypeError, match=msg): 178 pd.concat([Series(vals1), vals2]) 179 180 with pytest.raises(TypeError, match=msg): 181 pd.concat([Series(vals1), Series(vals2), vals3]) 182 183 def test_concatlike_dtypes_coercion(self): 184 # GH 13660 185 for typ1, vals1 in self.data.items(): 186 for typ2, vals2 in self.data.items(): 187 188 vals3 = vals2 189 190 # basically infer 191 exp_index_dtype = None 192 exp_series_dtype = None 193 194 if typ1 == typ2: 195 # same dtype is tested in test_concatlike_same_dtypes 196 continue 197 elif typ1 == "category" or typ2 == "category": 198 # TODO: suspicious 199 continue 200 201 # specify expected dtype 202 if typ1 == "bool" and typ2 in ("int64", "float64"): 203 # series coerces to numeric based on numpy rule 204 # index doesn't because bool is object dtype 205 exp_series_dtype = typ2 206 elif typ2 == "bool" and typ1 in ("int64", "float64"): 207 exp_series_dtype = typ1 208 elif ( 209 typ1 == "datetime64[ns, US/Eastern]" 210 or typ2 == "datetime64[ns, US/Eastern]" 211 or typ1 == "timedelta64[ns]" 212 or typ2 == "timedelta64[ns]" 213 ): 214 exp_index_dtype = object 215 exp_series_dtype = object 216 217 exp_data = vals1 + vals2 218 exp_data3 = vals1 + vals2 + vals3 219 220 # ----- Index ----- # 221 222 # index.append 223 res = Index(vals1).append(Index(vals2)) 224 exp = Index(exp_data, dtype=exp_index_dtype) 225 tm.assert_index_equal(res, exp) 226 227 # 3 elements 228 res = Index(vals1).append([Index(vals2), Index(vals3)]) 229 exp = Index(exp_data3, dtype=exp_index_dtype) 230 tm.assert_index_equal(res, exp) 231 232 # ----- Series ----- # 233 234 # series.append 235 res = Series(vals1).append(Series(vals2), ignore_index=True) 236 exp = Series(exp_data, dtype=exp_series_dtype) 237 tm.assert_series_equal(res, exp, check_index_type=True) 238 239 # concat 240 res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True) 241 tm.assert_series_equal(res, exp, check_index_type=True) 242 243 # 3 elements 244 res = Series(vals1).append( 245 [Series(vals2), Series(vals3)], ignore_index=True 246 ) 247 exp = Series(exp_data3, dtype=exp_series_dtype) 248 tm.assert_series_equal(res, exp) 249 250 res = pd.concat( 251 [Series(vals1), Series(vals2), Series(vals3)], 252 ignore_index=True, 253 ) 254 tm.assert_series_equal(res, exp) 255 256 def test_concatlike_common_coerce_to_pandas_object(self): 257 # GH 13626 258 # result must be Timestamp/Timedelta, not datetime.datetime/timedelta 259 dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"]) 260 tdi = pd.TimedeltaIndex(["1 days", "2 days"]) 261 262 exp = Index( 263 [ 264 pd.Timestamp("2011-01-01"), 265 pd.Timestamp("2011-01-02"), 266 pd.Timedelta("1 days"), 267 pd.Timedelta("2 days"), 268 ] 269 ) 270 271 res = dti.append(tdi) 272 tm.assert_index_equal(res, exp) 273 assert isinstance(res[0], pd.Timestamp) 274 assert isinstance(res[-1], pd.Timedelta) 275 276 dts = Series(dti) 277 tds = Series(tdi) 278 res = dts.append(tds) 279 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 280 assert isinstance(res.iloc[0], pd.Timestamp) 281 assert isinstance(res.iloc[-1], pd.Timedelta) 282 283 res = pd.concat([dts, tds]) 284 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 285 assert isinstance(res.iloc[0], pd.Timestamp) 286 assert isinstance(res.iloc[-1], pd.Timedelta) 287 288 def test_concatlike_datetimetz(self, tz_aware_fixture): 289 tz = tz_aware_fixture 290 # GH 7795 291 dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) 292 dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz) 293 294 exp = pd.DatetimeIndex( 295 ["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz 296 ) 297 298 res = dti1.append(dti2) 299 tm.assert_index_equal(res, exp) 300 301 dts1 = Series(dti1) 302 dts2 = Series(dti2) 303 res = dts1.append(dts2) 304 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 305 306 res = pd.concat([dts1, dts2]) 307 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 308 309 @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"]) 310 def test_concatlike_datetimetz_short(self, tz): 311 # GH#7795 312 ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz) 313 ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz) 314 df1 = DataFrame(0, index=ix1, columns=["A", "B"]) 315 df2 = DataFrame(0, index=ix2, columns=["A", "B"]) 316 317 exp_idx = pd.DatetimeIndex( 318 ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"], 319 tz=tz, 320 ) 321 exp = DataFrame(0, index=exp_idx, columns=["A", "B"]) 322 323 tm.assert_frame_equal(df1.append(df2), exp) 324 tm.assert_frame_equal(pd.concat([df1, df2]), exp) 325 326 def test_concatlike_datetimetz_to_object(self, tz_aware_fixture): 327 tz = tz_aware_fixture 328 # GH 13660 329 330 # different tz coerces to object 331 dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) 332 dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"]) 333 334 exp = Index( 335 [ 336 pd.Timestamp("2011-01-01", tz=tz), 337 pd.Timestamp("2011-01-02", tz=tz), 338 pd.Timestamp("2012-01-01"), 339 pd.Timestamp("2012-01-02"), 340 ], 341 dtype=object, 342 ) 343 344 res = dti1.append(dti2) 345 tm.assert_index_equal(res, exp) 346 347 dts1 = Series(dti1) 348 dts2 = Series(dti2) 349 res = dts1.append(dts2) 350 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 351 352 res = pd.concat([dts1, dts2]) 353 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 354 355 # different tz 356 dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific") 357 358 exp = Index( 359 [ 360 pd.Timestamp("2011-01-01", tz=tz), 361 pd.Timestamp("2011-01-02", tz=tz), 362 pd.Timestamp("2012-01-01", tz="US/Pacific"), 363 pd.Timestamp("2012-01-02", tz="US/Pacific"), 364 ], 365 dtype=object, 366 ) 367 368 res = dti1.append(dti3) 369 # tm.assert_index_equal(res, exp) 370 371 dts1 = Series(dti1) 372 dts3 = Series(dti3) 373 res = dts1.append(dts3) 374 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 375 376 res = pd.concat([dts1, dts3]) 377 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 378 379 def test_concatlike_common_period(self): 380 # GH 13660 381 pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") 382 pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M") 383 384 exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M") 385 386 res = pi1.append(pi2) 387 tm.assert_index_equal(res, exp) 388 389 ps1 = Series(pi1) 390 ps2 = Series(pi2) 391 res = ps1.append(ps2) 392 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 393 394 res = pd.concat([ps1, ps2]) 395 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 396 397 def test_concatlike_common_period_diff_freq_to_object(self): 398 # GH 13221 399 pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") 400 pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D") 401 402 exp = Index( 403 [ 404 pd.Period("2011-01", freq="M"), 405 pd.Period("2011-02", freq="M"), 406 pd.Period("2012-01-01", freq="D"), 407 pd.Period("2012-02-01", freq="D"), 408 ], 409 dtype=object, 410 ) 411 412 res = pi1.append(pi2) 413 tm.assert_index_equal(res, exp) 414 415 ps1 = Series(pi1) 416 ps2 = Series(pi2) 417 res = ps1.append(ps2) 418 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 419 420 res = pd.concat([ps1, ps2]) 421 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 422 423 def test_concatlike_common_period_mixed_dt_to_object(self): 424 # GH 13221 425 # different datetimelike 426 pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") 427 tdi = pd.TimedeltaIndex(["1 days", "2 days"]) 428 exp = Index( 429 [ 430 pd.Period("2011-01", freq="M"), 431 pd.Period("2011-02", freq="M"), 432 pd.Timedelta("1 days"), 433 pd.Timedelta("2 days"), 434 ], 435 dtype=object, 436 ) 437 438 res = pi1.append(tdi) 439 tm.assert_index_equal(res, exp) 440 441 ps1 = Series(pi1) 442 tds = Series(tdi) 443 res = ps1.append(tds) 444 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 445 446 res = pd.concat([ps1, tds]) 447 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 448 449 # inverse 450 exp = Index( 451 [ 452 pd.Timedelta("1 days"), 453 pd.Timedelta("2 days"), 454 pd.Period("2011-01", freq="M"), 455 pd.Period("2011-02", freq="M"), 456 ], 457 dtype=object, 458 ) 459 460 res = tdi.append(pi1) 461 tm.assert_index_equal(res, exp) 462 463 ps1 = Series(pi1) 464 tds = Series(tdi) 465 res = tds.append(ps1) 466 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 467 468 res = pd.concat([tds, ps1]) 469 tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) 470 471 def test_concat_categorical(self): 472 # GH 13524 473 474 # same categories -> category 475 s1 = Series([1, 2, np.nan], dtype="category") 476 s2 = Series([2, 1, 2], dtype="category") 477 478 exp = Series([1, 2, np.nan, 2, 1, 2], dtype="category") 479 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 480 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 481 482 # partially different categories => not-category 483 s1 = Series([3, 2], dtype="category") 484 s2 = Series([2, 1], dtype="category") 485 486 exp = Series([3, 2, 2, 1]) 487 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 488 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 489 490 # completely different categories (same dtype) => not-category 491 s1 = Series([10, 11, np.nan], dtype="category") 492 s2 = Series([np.nan, 1, 3, 2], dtype="category") 493 494 exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype="object") 495 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 496 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 497 498 def test_union_categorical_same_categories_different_order(self): 499 # https://github.com/pandas-dev/pandas/issues/19096 500 a = Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"])) 501 b = Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"])) 502 result = pd.concat([a, b], ignore_index=True) 503 expected = Series( 504 Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"]) 505 ) 506 tm.assert_series_equal(result, expected) 507 508 def test_concat_categorical_coercion(self): 509 # GH 13524 510 511 # category + not-category => not-category 512 s1 = Series([1, 2, np.nan], dtype="category") 513 s2 = Series([2, 1, 2]) 514 515 exp = Series([1, 2, np.nan, 2, 1, 2], dtype="object") 516 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 517 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 518 519 # result shouldn't be affected by 1st elem dtype 520 exp = Series([2, 1, 2, 1, 2, np.nan], dtype="object") 521 tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) 522 tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) 523 524 # all values are not in category => not-category 525 s1 = Series([3, 2], dtype="category") 526 s2 = Series([2, 1]) 527 528 exp = Series([3, 2, 2, 1]) 529 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 530 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 531 532 exp = Series([2, 1, 3, 2]) 533 tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) 534 tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) 535 536 # completely different categories => not-category 537 s1 = Series([10, 11, np.nan], dtype="category") 538 s2 = Series([1, 3, 2]) 539 540 exp = Series([10, 11, np.nan, 1, 3, 2], dtype="object") 541 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 542 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 543 544 exp = Series([1, 3, 2, 10, 11, np.nan], dtype="object") 545 tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) 546 tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) 547 548 # different dtype => not-category 549 s1 = Series([10, 11, np.nan], dtype="category") 550 s2 = Series(["a", "b", "c"]) 551 552 exp = Series([10, 11, np.nan, "a", "b", "c"]) 553 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 554 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 555 556 exp = Series(["a", "b", "c", 10, 11, np.nan]) 557 tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) 558 tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) 559 560 # if normal series only contains NaN-likes => not-category 561 s1 = Series([10, 11], dtype="category") 562 s2 = Series([np.nan, np.nan, np.nan]) 563 564 exp = Series([10, 11, np.nan, np.nan, np.nan]) 565 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 566 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 567 568 exp = Series([np.nan, np.nan, np.nan, 10, 11]) 569 tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) 570 tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) 571 572 def test_concat_categorical_3elem_coercion(self): 573 # GH 13524 574 575 # mixed dtypes => not-category 576 s1 = Series([1, 2, np.nan], dtype="category") 577 s2 = Series([2, 1, 2], dtype="category") 578 s3 = Series([1, 2, 1, 2, np.nan]) 579 580 exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float") 581 tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) 582 tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp) 583 584 exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float") 585 tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) 586 tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp) 587 588 # values are all in either category => not-category 589 s1 = Series([4, 5, 6], dtype="category") 590 s2 = Series([1, 2, 3], dtype="category") 591 s3 = Series([1, 3, 4]) 592 593 exp = Series([4, 5, 6, 1, 2, 3, 1, 3, 4]) 594 tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) 595 tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp) 596 597 exp = Series([1, 3, 4, 4, 5, 6, 1, 2, 3]) 598 tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) 599 tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp) 600 601 # values are all in either category => not-category 602 s1 = Series([4, 5, 6], dtype="category") 603 s2 = Series([1, 2, 3], dtype="category") 604 s3 = Series([10, 11, 12]) 605 606 exp = Series([4, 5, 6, 1, 2, 3, 10, 11, 12]) 607 tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) 608 tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp) 609 610 exp = Series([10, 11, 12, 4, 5, 6, 1, 2, 3]) 611 tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) 612 tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp) 613 614 def test_concat_categorical_multi_coercion(self): 615 # GH 13524 616 617 s1 = Series([1, 3], dtype="category") 618 s2 = Series([3, 4], dtype="category") 619 s3 = Series([2, 3]) 620 s4 = Series([2, 2], dtype="category") 621 s5 = Series([1, np.nan]) 622 s6 = Series([1, 3, 2], dtype="category") 623 624 # mixed dtype, values are all in categories => not-category 625 exp = Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2]) 626 res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True) 627 tm.assert_series_equal(res, exp) 628 res = s1.append([s2, s3, s4, s5, s6], ignore_index=True) 629 tm.assert_series_equal(res, exp) 630 631 exp = Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3]) 632 res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True) 633 tm.assert_series_equal(res, exp) 634 res = s6.append([s5, s4, s3, s2, s1], ignore_index=True) 635 tm.assert_series_equal(res, exp) 636 637 def test_concat_categorical_ordered(self): 638 # GH 13524 639 640 s1 = Series(Categorical([1, 2, np.nan], ordered=True)) 641 s2 = Series(Categorical([2, 1, 2], ordered=True)) 642 643 exp = Series(Categorical([1, 2, np.nan, 2, 1, 2], ordered=True)) 644 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 645 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 646 647 exp = Series(Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True)) 648 tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp) 649 tm.assert_series_equal(s1.append([s2, s1], ignore_index=True), exp) 650 651 def test_concat_categorical_coercion_nan(self): 652 # GH 13524 653 654 # some edge cases 655 # category + not-category => not category 656 s1 = Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category") 657 s2 = Series([np.nan, 1]) 658 659 exp = Series([np.nan, np.nan, np.nan, 1]) 660 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 661 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 662 663 s1 = Series([1, np.nan], dtype="category") 664 s2 = Series([np.nan, np.nan]) 665 666 exp = Series([1, np.nan, np.nan, np.nan], dtype="float") 667 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 668 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 669 670 # mixed dtype, all nan-likes => not-category 671 s1 = Series([np.nan, np.nan], dtype="category") 672 s2 = Series([np.nan, np.nan]) 673 674 exp = Series([np.nan, np.nan, np.nan, np.nan]) 675 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 676 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 677 tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) 678 tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) 679 680 # all category nan-likes => category 681 s1 = Series([np.nan, np.nan], dtype="category") 682 s2 = Series([np.nan, np.nan], dtype="category") 683 684 exp = Series([np.nan, np.nan, np.nan, np.nan], dtype="category") 685 686 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 687 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 688 689 def test_concat_categorical_empty(self): 690 # GH 13524 691 692 s1 = Series([], dtype="category") 693 s2 = Series([1, 2], dtype="category") 694 695 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) 696 tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) 697 698 tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) 699 tm.assert_series_equal(s2.append(s1, ignore_index=True), s2) 700 701 s1 = Series([], dtype="category") 702 s2 = Series([], dtype="category") 703 704 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) 705 tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) 706 707 s1 = Series([], dtype="category") 708 s2 = Series([], dtype="object") 709 710 # different dtype => not-category 711 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) 712 tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) 713 tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) 714 tm.assert_series_equal(s2.append(s1, ignore_index=True), s2) 715 716 s1 = Series([], dtype="category") 717 s2 = Series([np.nan, np.nan]) 718 719 # empty Series is ignored 720 exp = Series([np.nan, np.nan]) 721 tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) 722 tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) 723 724 tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) 725 tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) 726 727 def test_categorical_concat_append(self): 728 cat = Categorical(["a", "b"], categories=["a", "b"]) 729 vals = [1, 2] 730 df = DataFrame({"cats": cat, "vals": vals}) 731 cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"]) 732 vals2 = [1, 2, 1, 2] 733 exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1])) 734 735 tm.assert_frame_equal(pd.concat([df, df]), exp) 736 tm.assert_frame_equal(df.append(df), exp) 737 738 # GH 13524 can concat different categories 739 cat3 = Categorical(["a", "b"], categories=["a", "b", "c"]) 740 vals3 = [1, 2] 741 df_different_categories = DataFrame({"cats": cat3, "vals": vals3}) 742 743 res = pd.concat([df, df_different_categories], ignore_index=True) 744 exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]}) 745 tm.assert_frame_equal(res, exp) 746 747 res = df.append(df_different_categories, ignore_index=True) 748 tm.assert_frame_equal(res, exp) 749