1from collections import Counter, defaultdict 2from itertools import chain 3 4import numpy as np 5import pytest 6 7import pandas as pd 8from pandas import DataFrame, Index, MultiIndex, Series, isna, timedelta_range 9import pandas._testing as tm 10from pandas.core.base import SpecificationError 11 12 13class TestSeriesApply: 14 def test_series_map_box_timedelta(self): 15 # GH#11349 16 ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h")) 17 18 def f(x): 19 return x.total_seconds() 20 21 ser.map(f) 22 ser.apply(f) 23 DataFrame(ser).applymap(f) 24 25 def test_apply(self, datetime_series): 26 with np.errstate(all="ignore"): 27 tm.assert_series_equal( 28 datetime_series.apply(np.sqrt), np.sqrt(datetime_series) 29 ) 30 31 # element-wise apply 32 import math 33 34 tm.assert_series_equal( 35 datetime_series.apply(math.exp), np.exp(datetime_series) 36 ) 37 38 # empty series 39 s = Series(dtype=object, name="foo", index=Index([], name="bar")) 40 rs = s.apply(lambda x: x) 41 tm.assert_series_equal(s, rs) 42 43 # check all metadata (GH 9322) 44 assert s is not rs 45 assert s.index is rs.index 46 assert s.dtype == rs.dtype 47 assert s.name == rs.name 48 49 # index but no data 50 s = Series(index=[1, 2, 3], dtype=np.float64) 51 rs = s.apply(lambda x: x) 52 tm.assert_series_equal(s, rs) 53 54 def test_apply_same_length_inference_bug(self): 55 s = Series([1, 2]) 56 57 def f(x): 58 return (x, x + 1) 59 60 result = s.apply(f) 61 expected = s.map(f) 62 tm.assert_series_equal(result, expected) 63 64 s = Series([1, 2, 3]) 65 result = s.apply(f) 66 expected = s.map(f) 67 tm.assert_series_equal(result, expected) 68 69 def test_apply_dont_convert_dtype(self): 70 s = Series(np.random.randn(10)) 71 72 def f(x): 73 return x if x > 0 else np.nan 74 75 result = s.apply(f, convert_dtype=False) 76 assert result.dtype == object 77 78 def test_with_string_args(self, datetime_series): 79 80 for arg in ["sum", "mean", "min", "max", "std"]: 81 result = datetime_series.apply(arg) 82 expected = getattr(datetime_series, arg)() 83 assert result == expected 84 85 def test_apply_args(self): 86 s = Series(["foo,bar"]) 87 88 result = s.apply(str.split, args=(",",)) 89 assert result[0] == ["foo", "bar"] 90 assert isinstance(result[0], list) 91 92 def test_series_map_box_timestamps(self): 93 # GH#2689, GH#2627 94 ser = Series(pd.date_range("1/1/2000", periods=10)) 95 96 def func(x): 97 return (x.hour, x.day, x.month) 98 99 # it works! 100 ser.map(func) 101 ser.apply(func) 102 103 def test_apply_box(self): 104 # ufunc will not be boxed. Same test cases as the test_map_box 105 vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] 106 s = Series(vals) 107 assert s.dtype == "datetime64[ns]" 108 # boxed value must be Timestamp instance 109 res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") 110 exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) 111 tm.assert_series_equal(res, exp) 112 113 vals = [ 114 pd.Timestamp("2011-01-01", tz="US/Eastern"), 115 pd.Timestamp("2011-01-02", tz="US/Eastern"), 116 ] 117 s = Series(vals) 118 assert s.dtype == "datetime64[ns, US/Eastern]" 119 res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") 120 exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) 121 tm.assert_series_equal(res, exp) 122 123 # timedelta 124 vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] 125 s = Series(vals) 126 assert s.dtype == "timedelta64[ns]" 127 res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") 128 exp = Series(["Timedelta_1", "Timedelta_2"]) 129 tm.assert_series_equal(res, exp) 130 131 # period 132 vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] 133 s = Series(vals) 134 assert s.dtype == "Period[M]" 135 res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") 136 exp = Series(["Period_M", "Period_M"]) 137 tm.assert_series_equal(res, exp) 138 139 def test_apply_datetimetz(self): 140 values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( 141 "Asia/Tokyo" 142 ) 143 s = Series(values, name="XX") 144 145 result = s.apply(lambda x: x + pd.offsets.Day()) 146 exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize( 147 "Asia/Tokyo" 148 ) 149 exp = Series(exp_values, name="XX") 150 tm.assert_series_equal(result, exp) 151 152 # change dtype 153 # GH 14506 : Returned dtype changed from int32 to int64 154 result = s.apply(lambda x: x.hour) 155 exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) 156 tm.assert_series_equal(result, exp) 157 158 # not vectorized 159 def f(x): 160 if not isinstance(x, pd.Timestamp): 161 raise ValueError 162 return str(x.tz) 163 164 result = s.map(f) 165 exp = Series(["Asia/Tokyo"] * 25, name="XX") 166 tm.assert_series_equal(result, exp) 167 168 def test_apply_dict_depr(self): 169 170 tsdf = DataFrame( 171 np.random.randn(10, 3), 172 columns=["A", "B", "C"], 173 index=pd.date_range("1/1/2000", periods=10), 174 ) 175 msg = "nested renamer is not supported" 176 with pytest.raises(SpecificationError, match=msg): 177 tsdf.A.agg({"foo": ["sum", "mean"]}) 178 179 def test_apply_categorical(self): 180 values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) 181 ser = Series(values, name="XX", index=list("abcdefg")) 182 result = ser.apply(lambda x: x.lower()) 183 184 # should be categorical dtype when the number of categories are 185 # the same 186 values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) 187 exp = Series(values, name="XX", index=list("abcdefg")) 188 tm.assert_series_equal(result, exp) 189 tm.assert_categorical_equal(result.values, exp.values) 190 191 result = ser.apply(lambda x: "A") 192 exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) 193 tm.assert_series_equal(result, exp) 194 assert result.dtype == object 195 196 @pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]]) 197 def test_apply_categorical_with_nan_values(self, series): 198 # GH 20714 bug fixed in: GH 24275 199 s = Series(series, dtype="category") 200 result = s.apply(lambda x: x.split("-")[0]) 201 result = result.astype(object) 202 expected = Series(["1", "1", np.NaN], dtype="category") 203 expected = expected.astype(object) 204 tm.assert_series_equal(result, expected) 205 206 def test_apply_empty_integer_series_with_datetime_index(self): 207 # GH 21245 208 s = Series([], index=pd.date_range(start="2018-01-01", periods=0), dtype=int) 209 result = s.apply(lambda x: x) 210 tm.assert_series_equal(result, s) 211 212 213class TestSeriesAggregate: 214 def test_transform(self, string_series): 215 # transforming functions 216 217 with np.errstate(all="ignore"): 218 219 f_sqrt = np.sqrt(string_series) 220 f_abs = np.abs(string_series) 221 222 # ufunc 223 result = string_series.apply(np.sqrt) 224 expected = f_sqrt.copy() 225 tm.assert_series_equal(result, expected) 226 227 # list-like 228 result = string_series.apply([np.sqrt]) 229 expected = f_sqrt.to_frame().copy() 230 expected.columns = ["sqrt"] 231 tm.assert_frame_equal(result, expected) 232 233 result = string_series.apply(["sqrt"]) 234 tm.assert_frame_equal(result, expected) 235 236 # multiple items in list 237 # these are in the order as if we are applying both functions per 238 # series and then concatting 239 expected = pd.concat([f_sqrt, f_abs], axis=1) 240 expected.columns = ["sqrt", "absolute"] 241 result = string_series.apply([np.sqrt, np.abs]) 242 tm.assert_frame_equal(result, expected) 243 244 # dict, provide renaming 245 expected = pd.concat([f_sqrt, f_abs], axis=1) 246 expected.columns = ["foo", "bar"] 247 expected = expected.unstack().rename("series") 248 249 result = string_series.apply({"foo": np.sqrt, "bar": np.abs}) 250 tm.assert_series_equal(result.reindex_like(expected), expected) 251 252 def test_transform_and_agg_error(self, string_series): 253 # we are trying to transform with an aggregator 254 msg = "cannot combine transform and aggregation" 255 with pytest.raises(ValueError, match=msg): 256 with np.errstate(all="ignore"): 257 string_series.agg(["sqrt", "max"]) 258 259 msg = "cannot perform both aggregation and transformation" 260 with pytest.raises(ValueError, match=msg): 261 with np.errstate(all="ignore"): 262 string_series.agg({"foo": np.sqrt, "bar": "sum"}) 263 264 def test_demo(self): 265 # demonstration tests 266 s = Series(range(6), dtype="int64", name="series") 267 268 result = s.agg(["min", "max"]) 269 expected = Series([0, 5], index=["min", "max"], name="series") 270 tm.assert_series_equal(result, expected) 271 272 result = s.agg({"foo": "min"}) 273 expected = Series([0], index=["foo"], name="series") 274 tm.assert_series_equal(result, expected) 275 276 # nested renaming 277 msg = "nested renamer is not supported" 278 with pytest.raises(SpecificationError, match=msg): 279 s.agg({"foo": ["min", "max"]}) 280 281 def test_multiple_aggregators_with_dict_api(self): 282 283 s = Series(range(6), dtype="int64", name="series") 284 # nested renaming 285 msg = "nested renamer is not supported" 286 with pytest.raises(SpecificationError, match=msg): 287 s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]}) 288 289 def test_agg_apply_evaluate_lambdas_the_same(self, string_series): 290 # test that we are evaluating row-by-row first 291 # before vectorized evaluation 292 result = string_series.apply(lambda x: str(x)) 293 expected = string_series.agg(lambda x: str(x)) 294 tm.assert_series_equal(result, expected) 295 296 result = string_series.apply(str) 297 expected = string_series.agg(str) 298 tm.assert_series_equal(result, expected) 299 300 def test_with_nested_series(self, datetime_series): 301 # GH 2316 302 # .agg with a reducer and a transform, what to do 303 result = datetime_series.apply( 304 lambda x: Series([x, x ** 2], index=["x", "x^2"]) 305 ) 306 expected = DataFrame({"x": datetime_series, "x^2": datetime_series ** 2}) 307 tm.assert_frame_equal(result, expected) 308 309 result = datetime_series.agg(lambda x: Series([x, x ** 2], index=["x", "x^2"])) 310 tm.assert_frame_equal(result, expected) 311 312 def test_replicate_describe(self, string_series): 313 # this also tests a result set that is all scalars 314 expected = string_series.describe() 315 result = string_series.apply( 316 { 317 "count": "count", 318 "mean": "mean", 319 "std": "std", 320 "min": "min", 321 "25%": lambda x: x.quantile(0.25), 322 "50%": "median", 323 "75%": lambda x: x.quantile(0.75), 324 "max": "max", 325 } 326 ) 327 tm.assert_series_equal(result, expected) 328 329 def test_reduce(self, string_series): 330 # reductions with named functions 331 result = string_series.agg(["sum", "mean"]) 332 expected = Series( 333 [string_series.sum(), string_series.mean()], 334 ["sum", "mean"], 335 name=string_series.name, 336 ) 337 tm.assert_series_equal(result, expected) 338 339 def test_non_callable_aggregates(self): 340 # test agg using non-callable series attributes 341 s = Series([1, 2, None]) 342 343 # Calling agg w/ just a string arg same as calling s.arg 344 result = s.agg("size") 345 expected = s.size 346 assert result == expected 347 348 # test when mixed w/ callable reducers 349 result = s.agg(["size", "count", "mean"]) 350 expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5}) 351 tm.assert_series_equal(result[expected.index], expected) 352 353 @pytest.mark.parametrize( 354 "series, func, expected", 355 chain( 356 tm.get_cython_table_params( 357 Series(dtype=np.float64), 358 [ 359 ("sum", 0), 360 ("max", np.nan), 361 ("min", np.nan), 362 ("all", True), 363 ("any", False), 364 ("mean", np.nan), 365 ("prod", 1), 366 ("std", np.nan), 367 ("var", np.nan), 368 ("median", np.nan), 369 ], 370 ), 371 tm.get_cython_table_params( 372 Series([np.nan, 1, 2, 3]), 373 [ 374 ("sum", 6), 375 ("max", 3), 376 ("min", 1), 377 ("all", True), 378 ("any", True), 379 ("mean", 2), 380 ("prod", 6), 381 ("std", 1), 382 ("var", 1), 383 ("median", 2), 384 ], 385 ), 386 tm.get_cython_table_params( 387 Series("a b c".split()), 388 [ 389 ("sum", "abc"), 390 ("max", "c"), 391 ("min", "a"), 392 ("all", "c"), # see GH12863 393 ("any", "a"), 394 ], 395 ), 396 ), 397 ) 398 def test_agg_cython_table(self, series, func, expected): 399 # GH21224 400 # test reducing functions in 401 # pandas.core.base.SelectionMixin._cython_table 402 result = series.agg(func) 403 if tm.is_number(expected): 404 assert np.isclose(result, expected, equal_nan=True) 405 else: 406 assert result == expected 407 408 @pytest.mark.parametrize( 409 "series, func, expected", 410 chain( 411 tm.get_cython_table_params( 412 Series(dtype=np.float64), 413 [ 414 ("cumprod", Series([], Index([]), dtype=np.float64)), 415 ("cumsum", Series([], Index([]), dtype=np.float64)), 416 ], 417 ), 418 tm.get_cython_table_params( 419 Series([np.nan, 1, 2, 3]), 420 [ 421 ("cumprod", Series([np.nan, 1, 2, 6])), 422 ("cumsum", Series([np.nan, 1, 3, 6])), 423 ], 424 ), 425 tm.get_cython_table_params( 426 Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))] 427 ), 428 ), 429 ) 430 def test_agg_cython_table_transform(self, series, func, expected): 431 # GH21224 432 # test transforming functions in 433 # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) 434 result = series.agg(func) 435 tm.assert_series_equal(result, expected) 436 437 @pytest.mark.parametrize( 438 "series, func, expected", 439 chain( 440 tm.get_cython_table_params( 441 Series("a b c".split()), 442 [ 443 ("mean", TypeError), # mean raises TypeError 444 ("prod", TypeError), 445 ("std", TypeError), 446 ("var", TypeError), 447 ("median", TypeError), 448 ("cumprod", TypeError), 449 ], 450 ) 451 ), 452 ) 453 def test_agg_cython_table_raises(self, series, func, expected): 454 # GH21224 455 with pytest.raises(expected): 456 # e.g. Series('a b'.split()).cumprod() will raise 457 series.agg(func) 458 459 def test_series_apply_no_suffix_index(self): 460 # GH36189 461 s = Series([4] * 3) 462 result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) 463 expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"]) 464 465 tm.assert_series_equal(result, expected) 466 467 468class TestSeriesMap: 469 def test_map(self, datetime_series): 470 index, data = tm.getMixedTypeDict() 471 472 source = Series(data["B"], index=data["C"]) 473 target = Series(data["C"][:4], index=data["D"][:4]) 474 475 merged = target.map(source) 476 477 for k, v in merged.items(): 478 assert v == source[target[k]] 479 480 # input could be a dict 481 merged = target.map(source.to_dict()) 482 483 for k, v in merged.items(): 484 assert v == source[target[k]] 485 486 # function 487 result = datetime_series.map(lambda x: x * 2) 488 tm.assert_series_equal(result, datetime_series * 2) 489 490 # GH 10324 491 a = Series([1, 2, 3, 4]) 492 b = Series(["even", "odd", "even", "odd"], dtype="category") 493 c = Series(["even", "odd", "even", "odd"]) 494 495 exp = Series(["odd", "even", "odd", np.nan], dtype="category") 496 tm.assert_series_equal(a.map(b), exp) 497 exp = Series(["odd", "even", "odd", np.nan]) 498 tm.assert_series_equal(a.map(c), exp) 499 500 a = Series(["a", "b", "c", "d"]) 501 b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"])) 502 c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"])) 503 504 exp = Series([np.nan, 1, 2, 3]) 505 tm.assert_series_equal(a.map(b), exp) 506 exp = Series([np.nan, 1, 2, 3]) 507 tm.assert_series_equal(a.map(c), exp) 508 509 a = Series(["a", "b", "c", "d"]) 510 b = Series( 511 ["B", "C", "D", "E"], 512 dtype="category", 513 index=pd.CategoricalIndex(["b", "c", "d", "e"]), 514 ) 515 c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"])) 516 517 exp = Series( 518 pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"]) 519 ) 520 tm.assert_series_equal(a.map(b), exp) 521 exp = Series([np.nan, "B", "C", "D"]) 522 tm.assert_series_equal(a.map(c), exp) 523 524 def test_map_empty(self, index): 525 if isinstance(index, MultiIndex): 526 pytest.skip("Initializing a Series from a MultiIndex is not supported") 527 528 s = Series(index) 529 result = s.map({}) 530 531 expected = Series(np.nan, index=s.index) 532 tm.assert_series_equal(result, expected) 533 534 def test_map_compat(self): 535 # related GH 8024 536 s = Series([True, True, False], index=[1, 2, 3]) 537 result = s.map({True: "foo", False: "bar"}) 538 expected = Series(["foo", "foo", "bar"], index=[1, 2, 3]) 539 tm.assert_series_equal(result, expected) 540 541 def test_map_int(self): 542 left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4}) 543 right = Series({1: 11, 2: 22, 3: 33}) 544 545 assert left.dtype == np.float_ 546 assert issubclass(right.dtype.type, np.integer) 547 548 merged = left.map(right) 549 assert merged.dtype == np.float_ 550 assert isna(merged["d"]) 551 assert not isna(merged["c"]) 552 553 def test_map_type_inference(self): 554 s = Series(range(3)) 555 s2 = s.map(lambda x: np.where(x == 0, 0, 1)) 556 assert issubclass(s2.dtype.type, np.integer) 557 558 def test_map_decimal(self, string_series): 559 from decimal import Decimal 560 561 result = string_series.map(lambda x: Decimal(str(x))) 562 assert result.dtype == np.object_ 563 assert isinstance(result[0], Decimal) 564 565 def test_map_na_exclusion(self): 566 s = Series([1.5, np.nan, 3, np.nan, 5]) 567 568 result = s.map(lambda x: x * 2, na_action="ignore") 569 exp = s * 2 570 tm.assert_series_equal(result, exp) 571 572 def test_map_dict_with_tuple_keys(self): 573 """ 574 Due to new MultiIndex-ing behaviour in v0.14.0, 575 dicts with tuple keys passed to map were being 576 converted to a multi-index, preventing tuple values 577 from being mapped properly. 578 """ 579 # GH 18496 580 df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]}) 581 label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"} 582 583 df["labels"] = df["a"].map(label_mappings) 584 df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index) 585 # All labels should be filled now 586 tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False) 587 588 def test_map_counter(self): 589 s = Series(["a", "b", "c"], index=[1, 2, 3]) 590 counter = Counter() 591 counter["b"] = 5 592 counter["c"] += 1 593 result = s.map(counter) 594 expected = Series([0, 5, 1], index=[1, 2, 3]) 595 tm.assert_series_equal(result, expected) 596 597 def test_map_defaultdict(self): 598 s = Series([1, 2, 3], index=["a", "b", "c"]) 599 default_dict = defaultdict(lambda: "blank") 600 default_dict[1] = "stuff" 601 result = s.map(default_dict) 602 expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"]) 603 tm.assert_series_equal(result, expected) 604 605 def test_map_dict_na_key(self): 606 # https://github.com/pandas-dev/pandas/issues/17648 607 # Checks that np.nan key is appropriately mapped 608 s = Series([1, 2, np.nan]) 609 expected = Series(["a", "b", "c"]) 610 result = s.map({1: "a", 2: "b", np.nan: "c"}) 611 tm.assert_series_equal(result, expected) 612 613 def test_map_dict_subclass_with_missing(self): 614 """ 615 Test Series.map with a dictionary subclass that defines __missing__, 616 i.e. sets a default value (GH #15999). 617 """ 618 619 class DictWithMissing(dict): 620 def __missing__(self, key): 621 return "missing" 622 623 s = Series([1, 2, 3]) 624 dictionary = DictWithMissing({3: "three"}) 625 result = s.map(dictionary) 626 expected = Series(["missing", "missing", "three"]) 627 tm.assert_series_equal(result, expected) 628 629 def test_map_dict_subclass_without_missing(self): 630 class DictWithoutMissing(dict): 631 pass 632 633 s = Series([1, 2, 3]) 634 dictionary = DictWithoutMissing({3: "three"}) 635 result = s.map(dictionary) 636 expected = Series([np.nan, np.nan, "three"]) 637 tm.assert_series_equal(result, expected) 638 639 def test_map_abc_mapping(self, non_dict_mapping_subclass): 640 # https://github.com/pandas-dev/pandas/issues/29733 641 # Check collections.abc.Mapping support as mapper for Series.map 642 s = Series([1, 2, 3]) 643 not_a_dictionary = non_dict_mapping_subclass({3: "three"}) 644 result = s.map(not_a_dictionary) 645 expected = Series([np.nan, np.nan, "three"]) 646 tm.assert_series_equal(result, expected) 647 648 def test_map_abc_mapping_with_missing(self, non_dict_mapping_subclass): 649 # https://github.com/pandas-dev/pandas/issues/29733 650 # Check collections.abc.Mapping support as mapper for Series.map 651 class NonDictMappingWithMissing(non_dict_mapping_subclass): 652 def __missing__(self, key): 653 return "missing" 654 655 s = Series([1, 2, 3]) 656 not_a_dictionary = NonDictMappingWithMissing({3: "three"}) 657 result = s.map(not_a_dictionary) 658 # __missing__ is a dict concept, not a Mapping concept, 659 # so it should not change the result! 660 expected = Series([np.nan, np.nan, "three"]) 661 tm.assert_series_equal(result, expected) 662 663 def test_map_box(self): 664 vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] 665 s = Series(vals) 666 assert s.dtype == "datetime64[ns]" 667 # boxed value must be Timestamp instance 668 res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") 669 exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) 670 tm.assert_series_equal(res, exp) 671 672 vals = [ 673 pd.Timestamp("2011-01-01", tz="US/Eastern"), 674 pd.Timestamp("2011-01-02", tz="US/Eastern"), 675 ] 676 s = Series(vals) 677 assert s.dtype == "datetime64[ns, US/Eastern]" 678 res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") 679 exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) 680 tm.assert_series_equal(res, exp) 681 682 # timedelta 683 vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] 684 s = Series(vals) 685 assert s.dtype == "timedelta64[ns]" 686 res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") 687 exp = Series(["Timedelta_1", "Timedelta_2"]) 688 tm.assert_series_equal(res, exp) 689 690 # period 691 vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] 692 s = Series(vals) 693 assert s.dtype == "Period[M]" 694 res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") 695 exp = Series(["Period_M", "Period_M"]) 696 tm.assert_series_equal(res, exp) 697 698 def test_map_categorical(self): 699 values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) 700 s = Series(values, name="XX", index=list("abcdefg")) 701 702 result = s.map(lambda x: x.lower()) 703 exp_values = pd.Categorical( 704 list("abbabcd"), categories=list("dcba"), ordered=True 705 ) 706 exp = Series(exp_values, name="XX", index=list("abcdefg")) 707 tm.assert_series_equal(result, exp) 708 tm.assert_categorical_equal(result.values, exp_values) 709 710 result = s.map(lambda x: "A") 711 exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) 712 tm.assert_series_equal(result, exp) 713 assert result.dtype == object 714 715 with pytest.raises(NotImplementedError): 716 s.map(lambda x: x, na_action="ignore") 717 718 def test_map_datetimetz(self): 719 values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( 720 "Asia/Tokyo" 721 ) 722 s = Series(values, name="XX") 723 724 # keep tz 725 result = s.map(lambda x: x + pd.offsets.Day()) 726 exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize( 727 "Asia/Tokyo" 728 ) 729 exp = Series(exp_values, name="XX") 730 tm.assert_series_equal(result, exp) 731 732 # change dtype 733 # GH 14506 : Returned dtype changed from int32 to int64 734 result = s.map(lambda x: x.hour) 735 exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) 736 tm.assert_series_equal(result, exp) 737 738 with pytest.raises(NotImplementedError): 739 s.map(lambda x: x, na_action="ignore") 740 741 # not vectorized 742 def f(x): 743 if not isinstance(x, pd.Timestamp): 744 raise ValueError 745 return str(x.tz) 746 747 result = s.map(f) 748 exp = Series(["Asia/Tokyo"] * 25, name="XX") 749 tm.assert_series_equal(result, exp) 750 751 @pytest.mark.parametrize( 752 "vals,mapping,exp", 753 [ 754 (list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]), 755 (list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3), 756 (list(range(3)), {0: 42}, [42] + [np.nan] * 3), 757 ], 758 ) 759 def test_map_missing_mixed(self, vals, mapping, exp): 760 # GH20495 761 s = Series(vals + [np.nan]) 762 result = s.map(mapping) 763 764 tm.assert_series_equal(result, Series(exp)) 765 766 @pytest.mark.parametrize( 767 "dti,exp", 768 [ 769 ( 770 Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])), 771 DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"), 772 ), 773 ( 774 tm.makeTimeSeries(nper=30), 775 DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"), 776 ), 777 ], 778 ) 779 def test_apply_series_on_date_time_index_aware_series(self, dti, exp): 780 # GH 25959 781 # Calling apply on a localized time series should not cause an error 782 index = dti.tz_localize("UTC").index 783 result = Series(index).apply(lambda x: Series([1, 2])) 784 tm.assert_frame_equal(result, exp) 785 786 def test_apply_scaler_on_date_time_index_aware_series(self): 787 # GH 25959 788 # Calling apply on a localized time series should not cause an error 789 series = tm.makeTimeSeries(nper=30).tz_localize("UTC") 790 result = Series(series.index).apply(lambda x: 1) 791 tm.assert_series_equal(result, Series(np.ones(30), dtype="int64")) 792 793 def test_map_float_to_string_precision(self): 794 # GH 13228 795 ser = Series(1 / 3) 796 result = ser.map(lambda val: str(val)).to_dict() 797 expected = {0: "0.3333333333333333"} 798 assert result == expected 799 800 def test_map_with_invalid_na_action_raises(self): 801 # https://github.com/pandas-dev/pandas/issues/32815 802 s = Series([1, 2, 3]) 803 msg = "na_action must either be 'ignore' or None" 804 with pytest.raises(ValueError, match=msg): 805 s.map(lambda x: x, na_action="____") 806 807 def test_apply_to_timedelta(self): 808 list_of_valid_strings = ["00:00:01", "00:00:02"] 809 a = pd.to_timedelta(list_of_valid_strings) 810 b = Series(list_of_valid_strings).apply(pd.to_timedelta) 811 # FIXME: dont leave commented-out 812 # Can't compare until apply on a Series gives the correct dtype 813 # assert_series_equal(a, b) 814 815 list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT] 816 817 a = pd.to_timedelta(list_of_strings) # noqa 818 b = Series(list_of_strings).apply(pd.to_timedelta) # noqa 819 # Can't compare until apply on a Series gives the correct dtype 820 # assert_series_equal(a, b) 821